www.aleph1.co.uk Git - yaffs-website/blob - web/modules/contrib/typogrify/src/SmartyPants.php

   1 <?php
   2
   3 namespace Drupal\typogrify;
   4
   5 /**
   6  * SmartyPants  -  Smart punctuation for web sites.
   7  *
   8  * By John Gruber <http://daringfireball.net>
   9  *
  10  * PHP port by Michel Fortin
  11  * <http://www.michelf.com/>
  12  *
  13  * Copyright (c) 2003-2004 John Gruber
  14  * Copyright (c) 2004-2005 Michel Fortin
  15  * Copyright (c) 2012 Micha Glave - i18n-quote-style
  16  *
  17  * Re-released under GPLv2 for Drupal.
  18  *
  19  * Wrapped in a class for D8.
  20  * Do we trust the GPLv2 notice above?
  21  */
  22 class SmartyPants {
  23
  24   /**
  25    * Fri 9 Dec 2005.
  26    */
  27   const SMARTYPANTS_PHP_VERSION = '1.5.1e';
  28
  29   /**
  30    * Fri 12 Mar 2004.
  31    */
  32   const SMARTYPANTS_SYNTAX_VERSION = '1.5.1';
  33
  34   /**
  35    * Regex-pattern for tags we don't mess with.
  36    */
  37   const SMARTYPANTS_TAGS_TO_SKIP
  38     = '@<(/?)(?:pre|code|kbd|script|textarea|tt|math)[\s>]@';
  39
  40   /**
  41    * Current SmartyPants configuration setting.
  42    *
  43    * Change this to configure.
  44    * 1 =>  "--" for em-dashes; no en-dash support
  45    * 2 =>  "---" for em-dashes; "--" for en-dashes
  46    * 3 =>  "--" for em-dashes; "---" for en-dashes
  47    * See docs for more configuration options.
  48    *
  49    * @var string
  50    */
  51   protected static $smartypantsAttr = "1";
  52
  53   /* -- Smarty Modifier Interface ------------------------------------------*/
  54
  55   /**
  56    * Wrapper methode for SmartyPants.
  57    */
  58   public static function modifierSmartypants($text, $attr = NULL) {
  59     return self::process($text, $attr);
  60   }
  61
  62   /**
  63    * Returns a locale-specific array of quotes.
  64    */
  65   public static function i18nQuotes($langcode = NULL) {
  66     // Ignore all english-equivalents served by fallback.
  67     $quotes = array(
  68       // Arabic.
  69       'ar' => array('«', '»', '‹', '›'),
  70       // Belarusian.
  71       'be' => array('«', '»', '„', '“'),
  72       // Bulgarian.
  73       'bg' => array('„', '“', '‚', '‘'),
  74       // Danish.
  75       'da' => array('»', '«', '›', '‹'),
  76       // German.
  77       'de' => array('„', '“', '‚', '‘'),
  78       // Greek.
  79       'el' => array('«', '»', '‹', '›'),
  80       // English.
  81       'en' => array('“', '”', '‘', '’'),
  82       // Esperanto.
  83       'eo' => array('“', '”', '“', '”'),
  84       // Spanish.
  85       'es' => array('«', '»', '“', '“'),
  86       // Estonian.
  87       'et' => array('„', '“', '„', '“'),
  88       // Finnish.
  89       'fi' => array('”', '”', '’', '’'),
  90       // French.
  91       'fr' => array('«', '»', '‹', '›'),
  92       // Swiss German.
  93       'gsw-berne' => array('„', '“', '‚', '‘'),
  94       // Hebrew.
  95       'he' => array('“', '“', '«', '»'),
  96       // Croatian.
  97       'hr' => array('»', '«', '›', '‹'),
  98       // Hungarian.
  99       'hu' => array('„', '“', '„', '“'),
 100       // Icelandic.
 101       'is' => array('„', '“', '‚', '‘'),
 102       // Italian.
 103       'it' => array('«', '»', '‘', '’'),
 104       // Lithuanian.
 105       'lt' => array('„', '“', '‚', '‘'),
 106       // Latvian.
 107       'lv' => array('„', '“', '„', '“'),
 108       // Dutch.
 109       'nl' => array('„', '”', '‘', '’'),
 110       // Norwegian.
 111       'no' => array('„', '“', '„', '“'),
 112       // Polish.
 113       'pl' => array('„', '”', '«', '»'),
 114       // Portuguese.
 115       'pt' => array('“', '”', '‘', '’'),
 116       // Romanian.
 117       'ro' => array('„', '“', '«', '»'),
 118       // Russian.
 119       'ru' => array('«', '»', '„', '“'),
 120       // Slovak.
 121       'sk' => array('„', '“', '‚', '‘'),
 122       // Slovenian.
 123       'sl' => array('„', '“', '‚', '‘'),
 124       // Albanian.
 125       'sq' => array('«', '»', '‹', '›'),
 126       // Serbian.
 127       'sr' => array('„', '“', '‚', '‘'),
 128       // Swedish.
 129       'sv' => array('”', '”', '’', '’'),
 130       // Turkish.
 131       'tr' => array('«', '»', '‹', '›'),
 132       // Ukrainian.
 133       'uk' => array('«', '»', '„', '“'),
 134     );
 135     if ($langcode == 'all') {
 136       return $quotes;
 137     }
 138     if (isset($quotes[$langcode])) {
 139       return $quotes[$langcode];
 140     }
 141     return $quotes['en'];
 142   }
 143
 144   /**
 145    * SmartyPants.
 146    *
 147    * @param string $text
 148    *   Text to be parsed.
 149    * @param string $attr
 150    *   Value of the smart_quotes="" attribute.
 151    * @param string $ctx
 152    *   MT context object (unused).
 153    */
 154   public static function process($text, $attr = NULL, $ctx = NULL) {
 155     if ($attr == NULL) {
 156       $attr = self::$smartypantsAttr;
 157     }
 158     // Options to specify which transformations to make.
 159     $do_stupefy = FALSE;
 160     // Should we translate &quot; entities into normal quotes?
 161     $convert_quot = 0;
 162
 163     // Parse attributes:
 164     // 0 : do nothing
 165     // 1 : set all
 166     // 2 : set all, using old school en- and em- dash shortcuts
 167     // 3 : set all, using inverted old school en and em- dash shortcuts
 168     //
 169     // q : quotes
 170     // b : backtick quotes (``double'' and ,,double`` only)
 171     // B : backtick quotes (``double'', ,,double``, ,single` and `single')
 172     // d : dashes
 173     // D : old school dashes
 174     // i : inverted old school dashes
 175     // e : ellipses
 176     // w : convert &quot; entities to " for Dreamweaver users.
 177     if ($attr == "0") {
 178       // Do nothing.
 179       return $text;
 180     }
 181     elseif ($attr == "1") {
 182       // Do everything, turn all options on.
 183       $do_quotes    = 2;
 184       $do_backticks = 1;
 185       $do_dashes    = 1;
 186       $do_ellipses  = 1;
 187     }
 188     elseif ($attr == "2") {
 189       // Do everything, turn all options on, use old school dash shorthand.
 190       $do_quotes    = 2;
 191       $do_backticks = 1;
 192       $do_dashes    = 2;
 193       $do_ellipses  = 1;
 194     }
 195     elseif ($attr == "3") {
 196       // Do everything, turn all options on,
 197       // use inverted old school dash shorthand.
 198       $do_quotes    = 2;
 199       $do_backticks = 1;
 200       $do_dashes    = 3;
 201       $do_ellipses  = 1;
 202     }
 203     elseif ($attr == "-1") {
 204       // Special "stupefy" mode.
 205       $do_stupefy   = 1;
 206     }
 207     else {
 208       $chars = preg_split('//', $attr);
 209       foreach ($chars as $c) {
 210         if ($c == "q") {
 211           $do_quotes    = 1;
 212         }
 213         elseif ($c == "Q") {
 214           $do_quotes    = 2;
 215         }
 216         elseif ($c == "b") {
 217           $do_backticks = 1;
 218         }
 219         elseif ($c == "B") {
 220           $do_backticks = 2;
 221         }
 222         elseif ($c == "d") {
 223           $do_dashes    = 1;
 224         }
 225         elseif ($c == "D") {
 226           $do_dashes    = 2;
 227         }
 228         elseif ($c == "i") {
 229           $do_dashes    = 3;
 230         }
 231         elseif ($c == "e") {
 232           $do_ellipses  = 1;
 233         }
 234         elseif ($c == "w") {
 235           $convert_quot = 1;
 236         }
 237       }
 238     }
 239     if ($do_quotes == 2) {
 240       $doc_lang = $ctx['langcode'];
 241     }
 242     else {
 243       $doc_lang = 'en';
 244     }
 245
 246     $tokens = self::tokenizeHtml($text);
 247     $result = '';
 248     // Keep track of when we're inside <pre> or <code> tags.
 249     $in_pre = 0;
 250
 251     // This is a cheat, used to get some context
 252     // for one-character tokens that consist of
 253     // just a quote char. What we do is remember
 254     // the last character of the previous text
 255     // token, to use as context to curl single-
 256     // character quote tokens correctly.
 257     $prev_token_last_char = '';
 258
 259     foreach ($tokens as $cur_token) {
 260       if ($cur_token[0] == 'tag') {
 261         // Don't mess with quotes inside tags.
 262         $result .= $cur_token[1];
 263         if (preg_match(self::SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
 264           $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
 265         }
 266         else {
 267           // Reading language from span.
 268           if (preg_match('/<span .*(xml:)?lang="(..)"/', $cur_token[1], $matches)) {
 269             $span_lang = $matches[2];
 270           }
 271           elseif ($cur_token[1] == '</span>') {
 272             unset($span_lang);
 273           }
 274         }
 275       }
 276       else {
 277         $t = $cur_token[1];
 278         // Remember last char of this token before processing.
 279         $last_char = mb_substr($t, -1);
 280         if (!$in_pre) {
 281           $quotes = self::i18nQuotes(isset($span_lang) ? $span_lang : $doc_lang);
 282
 283           $t = self::processEscapes($t);
 284
 285           if ($convert_quot) {
 286             $t = preg_replace('/&quot;/', '"', $t);
 287           }
 288
 289           if ($do_dashes) {
 290             if ($do_dashes == 1) {
 291               $t = self::educateDashes($t);
 292             }
 293             elseif ($do_dashes == 2) {
 294               $t = self::educateDashesOldSchool($t);
 295             }
 296             elseif ($do_dashes == 3) {
 297               $t = self::educateDashesOldSchoolInverted($t);
 298             }
 299           }
 300
 301           if ($do_ellipses) {
 302             $t = self::educateEllipses($t);
 303           }
 304
 305           // Note: backticks need to be processed before quotes.
 306           if ($do_backticks) {
 307             $t = self::educateBackticks($t);
 308             if ($do_backticks == 2) {
 309               $t = self::educateSingleBackticks($t);
 310             }
 311           }
 312           if ($do_quotes) {
 313             $t = self::educateBackticks($t);
 314             if ($t == "'") {
 315               // Special case: single-character ' token.
 316               if (preg_match('/\S/', $prev_token_last_char)) {
 317                 $t = $quotes[3];
 318               }
 319               else {
 320                 $t = $quotes[2];
 321               }
 322             }
 323             elseif ($t == '"') {
 324               // Special case: single-character " token.
 325               if (preg_match('/\S/', $prev_token_last_char)) {
 326                 $t = $quotes[1];
 327               }
 328               else {
 329                 $t = $quotes[0];
 330               }
 331             }
 332             else {
 333               // Normal case:
 334               $t = self::educateQuotes($t, $quotes);
 335             }
 336           }
 337           if ($do_stupefy) {
 338             $t = self::stupefyEntities($t);
 339           }
 340         }
 341         $prev_token_last_char = $last_char;
 342         $result .= $t;
 343       }
 344     }
 345
 346     return $result;
 347   }
 348
 349   /**
 350    * SmartQuotes.
 351    *
 352    * @param string $text
 353    *   Text to be parsed.
 354    * @param string $attr
 355    *   Value of the smart_quotes="" attribute.
 356    * @param string $ctx
 357    *   MT context object (unused).
 358    *
 359    * @return string
 360    *   $text with replacements.
 361    */
 362   protected static function smartQuotes($text, $attr = NULL, $ctx = NULL) {
 363     if ($attr == NULL) {
 364       $attr = self::$smartypantsAttr;
 365     }
 366     // Should we educate ``backticks'' -style quotes?
 367     $do_backticks;
 368
 369     if ($attr == 0) {
 370       // Do nothing.
 371       return $text;
 372     }
 373     elseif ($attr == 2) {
 374       // Smarten ``backticks'' -style quotes.
 375       $do_backticks = 1;
 376     }
 377     else {
 378       $do_backticks = 0;
 379     }
 380
 381     // Special case to handle quotes at the very end of $text when preceded by
 382     // an HTML tag. Add a space to give the quote education algorithm a bit of
 383     // context, so that it can guess correctly that it's a closing quote:
 384     $add_extra_space = 0;
 385     if (preg_match("/>['\"]\\z/", $text)) {
 386       // Remember, so we can trim the extra space later.
 387       $add_extra_space = 1;
 388       $text .= " ";
 389     }
 390
 391     $tokens = self::tokenizeHtml($text);
 392     $result = '';
 393     // Keep track of when we're inside <pre> or <code> tags.
 394     $in_pre = 0;
 395
 396     // This is a cheat, used to get some context
 397     // for one-character tokens that consist of
 398     // just a quote char. What we do is remember
 399     // the last character of the previous text
 400     // token, to use as context to curl single-
 401     // character quote tokens correctly.
 402     $prev_token_last_char = "";
 403
 404     foreach ($tokens as $cur_token) {
 405       if ($cur_token[0] == "tag") {
 406         // Don't mess with quotes inside tags.
 407         $result .= $cur_token[1];
 408         if (preg_match(self::SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
 409           $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
 410         }
 411       }
 412       else {
 413         $t = $cur_token[1];
 414         // Remember last char of this token before processing.
 415         $last_char = mb_substr($t, -1);
 416         if (!$in_pre) {
 417           $t = self::processEscapes($t);
 418           if ($do_backticks) {
 419             $t = self::educateBackticks($t);
 420           }
 421
 422           if ($t == "'") {
 423             // Special case: single-character ' token.
 424             if (preg_match('/\S/', $prev_token_last_char)) {
 425               $t = "&#8217;";
 426             }
 427             else {
 428               $t = "&#8216;";
 429             }
 430           }
 431           elseif ($t == '"') {
 432             // Special case: single-character " token.
 433             if (preg_match('/\S/', $prev_token_last_char)) {
 434               $t = "&#8221;";
 435             }
 436             else {
 437               $t = "&#8220;";
 438             }
 439           }
 440           else {
 441             // Normal case:
 442             $t = self::educateQuotes($t);
 443           }
 444
 445         }
 446         $prev_token_last_char = $last_char;
 447         $result .= $t;
 448       }
 449     }
 450
 451     if ($add_extra_space) {
 452       // Trim trailing space if we added one earlier.
 453       preg_replace('/ \z/', '', $result);
 454     }
 455     return $result;
 456   }
 457
 458   /**
 459    * SmartDashes.
 460    *
 461    * @param string $text
 462    *   Text to be parsed.
 463    * @param string $attr
 464    *   Value of the smart_quotes="" attribute.
 465    * @param string $ctx
 466    *   MT context object (unused).
 467    *
 468    * @return string
 469    *   $text with replacements.
 470    */
 471   protected static function smartDashes($text, $attr = NULL, $ctx = NULL) {
 472     if ($attr == NULL) {
 473       $attr = self::$smartypantsAttr;
 474     }
 475
 476     // Reference to the subroutine to use for dash education,
 477     // default to educateDashes:
 478     $dash_sub_ref = 'educateDashes';
 479
 480     if ($attr == 0) {
 481       // Do nothing.
 482       return $text;
 483     }
 484     elseif ($attr == 2) {
 485       // Use old smart dash shortcuts, "--" for en, "---" for em.
 486       $dash_sub_ref = 'educateDashesOldSchool';
 487     }
 488     elseif ($attr == 3) {
 489       // Inverse of 2, "--" for em, "---" for en.
 490       $dash_sub_ref = 'educateDashesOldSchoolInverted';
 491     }
 492
 493     $tokens;
 494     $tokens = self::tokenizeHtml($text);
 495
 496     $result = '';
 497     // Keep track of when we're inside <pre> or <code> tags.
 498     $in_pre = 0;
 499     foreach ($tokens as $cur_token) {
 500       if ($cur_token[0] == "tag") {
 501         // Don't mess with quotes inside tags.
 502         $result .= $cur_token[1];
 503         if (preg_match(self::SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
 504           $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
 505         }
 506       }
 507       else {
 508         $t = $cur_token[1];
 509         if (!$in_pre) {
 510           $t = self::processEscapes($t);
 511           $t = self::$dash_sub_ref($t);
 512         }
 513         $result .= $t;
 514       }
 515     }
 516     return $result;
 517   }
 518
 519   /**
 520    * SmartEllipses.
 521    *
 522    * @param string $text
 523    *   Text to be parsed.
 524    * @param string $attr
 525    *   Value of the smart_quotes="" attribute.
 526    * @param string $ctx
 527    *   MT context object (unused).
 528    *
 529    * @return string
 530    *   $text with replacements.
 531    */
 532   protected static function smartEllipses($text, $attr = NULL, $ctx = NULL) {
 533     if ($attr == NULL) {
 534       $attr = self::$smartypantsAttr;
 535     }
 536
 537     if ($attr == 0) {
 538       // Do nothing.
 539       return $text;
 540     }
 541
 542     $tokens;
 543     $tokens = self::tokenizeHtml($text);
 544
 545     $result = '';
 546     // Keep track of when we're inside <pre> or <code> tags.
 547     $in_pre = 0;
 548     foreach ($tokens as $cur_token) {
 549       if ($cur_token[0] == "tag") {
 550         // Don't mess with quotes inside tags.
 551         $result .= $cur_token[1];
 552         if (preg_match(self::SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
 553           $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
 554         }
 555       }
 556       else {
 557         $t = $cur_token[1];
 558         if (!$in_pre) {
 559           $t = self::processEscapes($t);
 560           $t = self::educateEllipses($t);
 561         }
 562         $result .= $t;
 563       }
 564     }
 565     return $result;
 566   }
 567
 568   /**
 569    * Replaces '=' with '&shy;' for easier manual hyphenating.
 570    *
 571    * @param string $text
 572    *   The string to prettify.
 573    */
 574   public static function hyphenate($text) {
 575     $tokens;
 576     $tokens = self::tokenizeHtml($text);
 577
 578     $equal_finder = '/(\p{L})=(\p{L})/u';
 579     $result = '';
 580     // Keep track of when we're inside <pre> or <code> tags.
 581     $in_pre = 0;
 582     foreach ($tokens as $cur_token) {
 583       if ($cur_token[0] == "tag") {
 584         // Don't mess with quotes inside tags.
 585         $result .= $cur_token[1];
 586         if (preg_match(self::SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
 587           $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
 588         }
 589       }
 590       else {
 591         $t = $cur_token[1];
 592         if (!$in_pre) {
 593           $t = preg_replace($equal_finder, '\1&shy;\2', $t);
 594         }
 595         $result .= $t;
 596       }
 597     }
 598     return $result;
 599   }
 600
 601   /**
 602    * Adding space in numbers for easier reading aka digit grouping.
 603    *
 604    * @param string $text
 605    *   The string to prettify.
 606    * @param int $attr
 607    *   The kind of space to use.
 608    * @param array $ctx
 609    *   Not used.
 610    */
 611   public static function smartNumbers($text, $attr = 0, $ctx = NULL) {
 612     $tokens;
 613     $tokens = self::tokenizeHtml($text);
 614
 615     if ($attr == 0) {
 616       return $text;
 617     }
 618     elseif ($attr == 1) {
 619       $method = 'numberNarrownbsp';
 620     }
 621     elseif ($attr == 2) {
 622       $method = 'numberThinsp';
 623     }
 624     elseif ($attr == 3) {
 625       $method = 'numberSpan';
 626     }
 627     elseif ($attr == 4) {
 628       $method = 'numberJustSpan';
 629     }
 630
 631     $result = '';
 632     // Keep track of when we're inside <pre> or <code> tags.
 633     $in_pre = 0;
 634     $span_stop = 0;
 635     foreach ($tokens as $cur_token) {
 636       if ($cur_token[0] == "tag") {
 637         // Don't mess with quotes inside tags.
 638         $result .= $cur_token[1];
 639         if (preg_match(self::SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
 640           $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
 641         }
 642         elseif (preg_match('/<span .*class="[^"]*\b(number|phone|ignore)\b[^"]*"/', $cur_token[1], $matches)) {
 643           $span_stop = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
 644         }
 645         elseif ($cur_token[1] == '</span>') {
 646           $span_stop = 0;
 647         }
 648       }
 649       else {
 650         $t = $cur_token[1];
 651         if (!$in_pre && !$span_stop) {
 652           $number_finder = '@(?:(&#\d{2,4};|&(#x[0-9a-fA-F]{2,4}|frac\d\d);)|' .
 653             '(\d{4}-\d\d-\d\d)|(\d\d\.\d\d\.\d{4})|' .
 654             '(0[ \d-/]+)|([+-]?\d+)([.,]\d+|))@';
 655           $t = preg_replace_callback($number_finder, self::$method, $t);
 656         }
 657         $result .= $t;
 658       }
 659     }
 660     return $result;
 661   }
 662
 663   /**
 664    * Internal: wraps and inserts space in numbers.
 665    *
 666    * @param array $hit
 667    *   A matcher-array from preg_replace_callback.
 668    * @param string $thbl
 669    *   The kind of whitespace to add.
 670    */
 671   protected static function numberReplacer($hit, $thbl) {
 672     if ($hit[1] != '') {
 673       // Html-entity number: don't touch.
 674       return $hit[1];
 675     }
 676     elseif ($hit[2] != '') {
 677       // Html-entity number: don't touch.
 678       return $hit[2];
 679     }
 680     elseif ($hit[3] != '') {
 681       // Don't format german phone-numbers.
 682       return $hit[3];
 683     }
 684     elseif ($hit[4] != '') {
 685       // Date -`iso don't touch.
 686       return $hit[4];
 687     }
 688     elseif ($hit[5] != '') {
 689       // Date -`dd.mm.yyyy don't touch.
 690       return $hit[5];
 691     }
 692     if (preg_match('/[+-]?\d{5,}/', $hit[6]) == 1) {
 693       $dec = preg_replace('/[+-]?\d{1,3}(?=(\d{3})+(?!\d))/', '\0' . $thbl, $hit[6]);
 694     }
 695     else {
 696       $dec = $hit[6];
 697     }
 698     $frac = preg_replace('/\d{3}/', '\0' . $thbl, $hit[7]);
 699     return '<span class="number">' . $dec . $frac . '</span>';
 700   }
 701
 702   /**
 703    * Wrapping numbers and adding whitespace '&narrownbsp;'.
 704    *
 705    * @param array $hit
 706    *   A matcher-array from preg_replace_callback.
 707    */
 708   protected static function numberNarrownbsp($hit) {
 709     return self::numberReplacer($hit, '&#8239;');
 710   }
 711
 712   /**
 713    * Wrapping numbers and adding whitespace '&thinsp;'.
 714    *
 715    * @param array $hit
 716    *   A matcher-array from preg_replace_callback.
 717    */
 718   protected static function numberThinsp($hit) {
 719     return self::numberReplacer($hit, '&#8201;');
 720   }
 721
 722   /**
 723    * Wrapping numbers and adding whitespace by setting margin-left in a span.
 724    *
 725    * @param array $hit
 726    *   A matcher-array from preg_replace_callback.
 727    */
 728   protected static function numberSpan($hit) {
 729     $thbl = '<span style="margin-left:0.167em"></span>';
 730     return self::numberReplacer($hit, $thbl);
 731   }
 732
 733   /**
 734    * Wrapping numbers and adding whitespace by setting margin-left in a span.
 735    *
 736    * @param array $hit
 737    *   A matcher-array from preg_replace_callback.
 738    */
 739   protected static function numberJustSpan($hit) {
 740     return self::numberReplacer($hit, '');
 741   }
 742
 743   /**
 744    * Wrapping abbreviations and adding half space between digit grouping.
 745    *
 746    * @param string $text
 747    *   The string to prettify.
 748    * @param int $attr
 749    *   The kind of space to use.
 750    * @param array $ctx
 751    *   Not used.
 752    */
 753   public static function smartAbbreviation($text, $attr = 0, $ctx = NULL) {
 754     $tokens;
 755     $tokens = self::tokenizeHtml($text);
 756
 757     $replace_method = 'abbrAsis';
 758     if ($attr == 1) {
 759       $replace_method = 'abbrNarrownbsp';
 760     }
 761     elseif ($attr == 2) {
 762       $replace_method = 'abbrThinsp';
 763     }
 764     elseif ($attr == 3) {
 765       $replace_method = 'abbrSpan';
 766     }
 767     $result = '';
 768     // Keep track of when we're inside <pre> or <code> tags.
 769     $in_pre = 0;
 770     foreach ($tokens as $cur_token) {
 771       if ($cur_token[0] == "tag") {
 772         // Don't mess with quotes inside tags.
 773         $result .= $cur_token[1];
 774         if (preg_match(self::SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
 775           $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
 776         }
 777       }
 778       else {
 779         $t = $cur_token[1];
 780         if (!$in_pre) {
 781           $abbr_finder = '/(?<=\s|)(\p{L}+\.)(\p{L}+\.)+(?=\s|)/u';
 782           $t = preg_replace_callback($abbr_finder, self::$replace_method, $t);
 783         }
 784         $result .= $t;
 785       }
 786     }
 787     return $result;
 788   }
 789
 790   /**
 791    * Wrapping abbreviations without adding whitespace.
 792    *
 793    * @param array $hit
 794    *   A matcher-array from preg_replace_callback.
 795    */
 796   public static function abbrAsis($hit) {
 797     return '<span class="abbr">' . $hit[0] . '</span>';
 798   }
 799
 800   /**
 801    * Wrapping abbreviations adding whitespace '&thinsp;'.
 802    *
 803    * @param array $hit
 804    *   A matcher-array from preg_replace_callback.
 805    */
 806   protected static function abbrThinsp($hit) {
 807     $res = preg_replace('/\.(\p{L})/u', '.&#8201;\1', $hit[0]);
 808     return '<span class="abbr">' . $res . '</span>';
 809   }
 810
 811   /**
 812    * Wrapping abbreviations adding whitespace '&narrownbsp;'.
 813    *
 814    * @param array $hit
 815    *   A matcher-array from preg_replace_callback.
 816    */
 817   protected static function abbrNarrownbsp($hit) {
 818     $res = preg_replace('/\.(\p{L})/u', '.&#8239;\1', $hit[0]);
 819     return '<span class="abbr">' . $res . '</span>';
 820   }
 821
 822   /**
 823    * Wrapping abbreviations adding whitespace by setting margin-left in a span.
 824    *
 825    * @param array $hit
 826    *   A matcher-array from preg_replace_callback.
 827    */
 828   protected static function abbrSpan($hit) {
 829     $thbl = '.<span style="margin-left:0.167em"><span style="display:none">&nbsp;</span></span>';
 830     $res = preg_replace('/\.(\p{L})/u', $thbl . '\1', $hit[0]);
 831     return '<span class="abbr">' . $res . '</span>';
 832   }
 833
 834   /**
 835    * Wrapping ampersands.
 836    *
 837    * @param string $text
 838    *   The text to work on.
 839    */
 840   public static function smartAmpersand($text) {
 841     $tokens;
 842     $tokens = self::tokenizeHtml($text);
 843
 844     $result = '';
 845     // Keep track of when we're inside <pre> or <code> tags.
 846     $in_pre = 0;
 847     foreach ($tokens as $cur_token) {
 848       if ($cur_token[0] == "tag") {
 849         // Don't mess with quotes inside tags.
 850         $result .= $cur_token[1];
 851         if (preg_match(self::SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
 852           $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
 853         }
 854       }
 855       else {
 856         $t = $cur_token[1];
 857         if (!$in_pre) {
 858           $t = Typogrify::amp($t);
 859         }
 860         $result .= $t;
 861       }
 862     }
 863     return $result;
 864   }
 865
 866   /**
 867    * EducatedQuotes.
 868    *
 869    * Example input:  "Isn't this fun?"
 870    * Example output: [0]Isn&#8217;t this fun?[1];
 871    *
 872    * @param string $_
 873    *   Input text.
 874    *
 875    * @return string
 876    *   The string, with "educated" curly quote HTML entities.
 877    */
 878   protected static function educateQuotes($_, $quotes) {
 879     // Make our own "punctuation" character class, because the POSIX-style
 880     // [:PUNCT:] is only available in Perl 5.6 or later:
 881     $punct_class = "[!\"#\\$\\%'()*+,-.\\/:;<=>?\\@\\[\\\\\]\\^_`{|}~]";
 882
 883     // Special case if the very first character is a quote
 884     // followed by punctuation at a non-word-break.
 885     // Close the quotes by brute force:
 886     $_ = preg_replace(
 887       array("/^'(?=$punct_class\\B)/", "/^\"(?=$punct_class\\B)/"),
 888       array($quotes[3], $quotes[1]), $_);
 889
 890     // Special case for double sets of quotes, e.g.:
 891     // <p>He said, "'Quoted' words in a larger quote."</p>.
 892     $spacer = '&#8201;';
 893     $_ = preg_replace(
 894       array(
 895         "/\"'(?=\p{L})/u",
 896         "/'\"(?=\p{L})/u",
 897         "/(\p{L})\"'/u",
 898         "/(\p{L})'\"/u",
 899       ),
 900       array(
 901         $quotes[0] . $spacer . $quotes[2],
 902         $quotes[2] . $spacer . $quotes[0],
 903         '\1' . $quotes[1] . $spacer . $quotes[3],
 904         '\1' . $quotes[3] . $spacer . $quotes[1],
 905       ), $_);
 906
 907     // Special case for decade abbreviations (the '80s):
 908     $_ = preg_replace("/'(?=\\d{2}s)/", '&#8217;', $_);
 909
 910     // Special case for apostroph.
 911     $_ = preg_replace("/(\\p{L})(')(?=\\p{L}|$)/u", '\1&#8217;', $_);
 912
 913     $close_class = '[^\ \t\r\n\[\{\(\-]';
 914     $dec_dashes = '&\#8211;|&\#8212;';
 915
 916     // Get most opening single quotes:
 917     $_ = preg_replace("{
 918       (
 919         \\s         |   # a whitespace char, or
 920         &nbsp;      |   # a non-breaking space entity, or
 921         --          |   # dashes, or
 922         &[mn]dash;  |   # named dash entities
 923         $dec_dashes |   # or decimal entities
 924         &\\#x201[34];   # or hex
 925       )
 926       '                 # the quote
 927       (?=\\p{L})           # followed by a word character
 928       }x", '\1' . $quotes[2], $_);
 929     // Single closing quotes:
 930     $_ = preg_replace("{
 931       ($close_class)?
 932       '
 933       (?(1)|            # If $1 captured, then do nothing;
 934         (?=\\s | s\\b)  # otherwise, positive lookahead for a whitespace
 935       )                 # char or an 's' at a word ending position. This
 936                         # is a special case to handle something like:
 937                         # \"<i>Custer</i>'s Last Stand.\"
 938       }xi", '\1' . $quotes[3], $_);
 939
 940     // Any remaining single quotes should be opening ones:
 941     $_ = str_replace("'", $quotes[2], $_);
 942
 943     // Get most opening double quotes:
 944     $_ = preg_replace("{
 945       (
 946         \\s         |   # a whitespace char, or
 947         &nbsp;      |   # a non-breaking space entity, or
 948         --          |   # dashes, or
 949         &[mn]dash;  |   # named dash entities
 950         $dec_dashes |   # or decimal entities
 951         &\\#x201[34];   # or hex
 952       )
 953       \"                # the quote
 954       (?=\\p{L})           # followed by a word character
 955       }x", '\1' . $quotes[0], $_);
 956
 957     // Double closing quotes:
 958     $_ = preg_replace("{
 959       ($close_class)?
 960       \"
 961       (?(1)|(?=\\s))   # If $1 captured, then do nothing;
 962                        # if not, then make sure the next char is whitespace.
 963       }x", '\1' . $quotes[1], $_);
 964
 965     // Any remaining quotes should be opening ones.
 966     $_ = str_replace('"', $quotes[0], $_);
 967
 968     return $_;
 969   }
 970
 971   /**
 972    * Educate backticks.
 973    *
 974    * Example input:  ``Isn't this fun?''
 975    * Example output: &#8220;Isn't this fun?&#8221;.
 976    *
 977    * @param string $_
 978    *   Input text.
 979    *
 980    * @return string
 981    *   The string, with ``backticks'' -style double quotes
 982    *   translated into HTML curly quote entities.
 983    */
 984   protected static function educateBackticks($_) {
 985     $_ = str_replace(array("``", "''", ",,"),
 986                      array('&#8220;', '&#8221;', '&#8222;'), $_);
 987     return $_;
 988   }
 989
 990   /**
 991    * Educate single backticks.
 992    *
 993    * Example input:  `Isn't this fun?'
 994    * Example output: &#8216;Isn&#8217;t this fun?&#8217;
 995    *
 996    * @param string $_
 997    *   Input text.
 998    *
 999    * @return string
1000    *   The string, with `backticks' -style single quotes
1001    *   translated into HTML curly quote entities.
1002    */
1003   protected static function educateSingleBackticks($_) {
1004     $_ = str_replace(array("`", "'"), array('&#8216;', '&#8217;'), $_);
1005     return $_;
1006   }
1007
1008   /**
1009    * Educate dashes.
1010    *
1011    * Example input:  `Isn't this fun?'
1012    * Example output: &#8216;Isn&#8217;t this fun?&#8217;
1013    *
1014    * @param string $_
1015    *   Input text.
1016    *
1017    * @return string
1018    *   The string, with each instance of "--" translated to
1019    *   an em-dash HTML entity.
1020    */
1021   protected static function educateDashes($_) {
1022     $_ = str_replace('--', '&#8212;', $_);
1023     return $_;
1024   }
1025
1026   /**
1027    * EducateDashesOldSchool.
1028    *
1029    * @param string $_
1030    *   Input text.
1031    *
1032    * @return string
1033    *   The string, with each instance of "--" translated to
1034    *   an en-dash HTML entity, and each "---" translated to
1035    *   an em-dash HTML entity.
1036    */
1037   protected static function educateDashesOldSchool($_) {
1038     $_ = str_replace(array("---", "--"), array('&#8212;', '&#8211;'), $_);
1039     return $_;
1040   }
1041
1042   /**
1043    * EducateDashesOldSchoolInverted.
1044    *
1045    * @param string $_
1046    *   Input text.
1047    *
1048    * @return string
1049    *   The string, with each instance of "--" translated to an em-dash HTML
1050    *   entity, and each "---" translated to an en-dash HTML entity. Two
1051    *   reasons why: First, unlike the en- and em-dash syntax supported by
1052    *   educateDashesOldSchool(), it's compatible with existing entries written
1053    *   before SmartyPants 1.1, back when "--" was only used for em-dashes.
1054    *   Second, em-dashes are more common than en-dashes, and so it sort of makes
1055    *   sense that the shortcut should be shorter to type. (Thanks to Aaron
1056    *   Swartz for the idea.)
1057    */
1058   public static function educateDashesOldSchoolInverted($_) {
1059     // Dashes               en         em.
1060     $_ = str_replace(array("---", "--"), array('&#8211;', '&#8212;'), $_);
1061     return $_;
1062   }
1063
1064   /**
1065    * EducateEllipses.
1066    *
1067    * Example input:  Huh...?
1068    * Example output: Huh&#8230;?
1069    *
1070    * @param string $_
1071    *   Input text.
1072    *
1073    * @return string
1074    *   The string, with each instance of "..." translated to
1075    *   an ellipsis HTML entity. Also converts the case where
1076    *   there are spaces between the dots.
1077    */
1078   protected static function educateEllipses($_) {
1079     $_ = str_replace(array("...", ". . ."), '&#8230;', $_);
1080     return $_;
1081   }
1082
1083   /**
1084    * StupefyEntities.
1085    *
1086    * Example input:  &#8220;Hello &#8212; world.&#8221;
1087    * Example output: "Hello -- world."
1088    *
1089    * @param string $_
1090    *   Input text.
1091    *
1092    * @return string
1093    *   The string, with each SmartyPants HTML entity translated to
1094    *   it's ASCII counterpart.
1095    */
1096   protected static function stupefyEntities($_) {
1097     // Dashes               en-dash    em-dash.
1098     $_ = str_replace(array('&#8211;', '&#8212;'), array('-', '--'), $_);
1099
1100     // Single quote         open       close.
1101     $_ = str_replace(array('&#8216;', '&#8217;', '&#8218;'), "'", $_);
1102
1103     // Double quote         open       close.
1104     $_ = str_replace(array('&#8220;', '&#8221;', '&#8222;'), '"', $_);
1105
1106     // Ellipsis.
1107     $_ = str_replace('&#8230;', '...', $_);
1108
1109     return $_;
1110   }
1111
1112   /**
1113    * Process escapes.
1114    *
1115    *               Escape  Value
1116    *               ------  -----
1117    *               \\      &#92;
1118    *               \"      &#34;
1119    *               \'      &#39;
1120    *               \.      &#46;
1121    *               \-      &#45;
1122    *               \`      &#96;
1123    *               \,      &#x2c;
1124    *
1125    * @param string $_
1126    *   Input text.
1127    *
1128    * @return string
1129    *   The string, with after processing the following backslash
1130    *   escape sequences. This is useful if you want to force a "dumb"
1131    *   quote or other character to appear.
1132    */
1133   public static function processEscapes($_) {
1134
1135     $_ = str_replace(
1136       array(
1137         '\\\\',
1138         '\"',
1139         "\'",
1140         '\.',
1141         '\-',
1142         '\`',
1143         '\,',
1144         '<',
1145         '>',
1146         '&quot;',
1147         '&#039;',
1148       ),
1149       array(
1150         '&#92;',
1151         '&#34;',
1152         '&#39;',
1153         '&#46;',
1154         '&#45;',
1155         '&#96;',
1156         '&#x2c;',
1157         '&lt;',
1158         '&gt;',
1159         '"',
1160         '\'',
1161       ),
1162       $_
1163     );
1164
1165     return $_;
1166   }
1167
1168   /**
1169    * Space to nbsp.
1170    *
1171    * Replaces the space before a "double punctuation mark" (!?:;) with
1172    * ``&nbsp;``
1173    * Especially useful in french.
1174    */
1175   public static function spaceToNbsp($text) {
1176     $tokens;
1177     $tokens = self::tokenizeHtml($text);
1178
1179     $result = '';
1180     // Keep track of when we're inside <pre> or <code> tags.
1181     $in_pre = 0;
1182     foreach ($tokens as $cur_token) {
1183       if ($cur_token[0] == "tag") {
1184         // Don't mess with quotes inside tags.
1185         $result .= $cur_token[1];
1186         if (preg_match(self::SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
1187           $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
1188         }
1189       }
1190       else {
1191         $t = $cur_token[1];
1192         if (!$in_pre) {
1193           $t = preg_replace("/\s([\!\?\:;])/", '&nbsp;$1', $t);
1194         }
1195         $result .= $t;
1196       }
1197     }
1198     return $result;
1199   }
1200
1201   /**
1202    * Space hyphens.
1203    *
1204    * Replaces a normal dash with em-dash between whitespaces.
1205    */
1206   public static function spaceHyphens($text) {
1207     $tokens;
1208     $tokens = self::tokenizeHtml($text);
1209
1210     $result = '';
1211     // Keep track of when we're inside <pre> or <code> tags.
1212     $in_pre = 0;
1213     foreach ($tokens as $cur_token) {
1214       if ($cur_token[0] == "tag") {
1215         // Don't mess with quotes inside tags.
1216         $result .= $cur_token[1];
1217         if (preg_match(self::SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
1218           $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
1219         }
1220       }
1221       else {
1222         $t = $cur_token[1];
1223         if (!$in_pre) {
1224           $t = preg_replace("/\s(-{1,3})\s/", '&#8239;—&thinsp;', $t);
1225         }
1226         $result .= $t;
1227       }
1228     }
1229     return $result;
1230   }
1231
1232   /**
1233    * Fallback Tokenizer if Markdown not present.
1234    *
1235    * @param string $str
1236    *   String containing HTML markup.
1237    *
1238    * @returns array
1239    *   An array of the tokens comprising the input string. Each token is either
1240    *   a tag (possibly with nested, tags contained therein, such as
1241    *   <a href="<MTFoo>" />, or a run of text between tags. Each element of the
1242    *   array is a two-element array; the first is either 'tag' or 'text'; the
1243    *   second is the actual value.
1244    *
1245    * Regular expression derived from the _tokenize() subroutine in
1246    * Brad Choate's MTRegex plugin.
1247    * <http://www.bradchoate.com/past/mtregex.php>
1248    */
1249   public static function tokenizeHtml($str) {
1250
1251     $index = 0;
1252     $tokens = array();
1253
1254     // Comment
1255     // Processing instruction
1256     // Regular tags.
1257     $match = '(?s:<!(?:--.*?--\s*)+>)|' .
1258       '(?s:<\?.*?\?>)|' .
1259       '(?:<[/!$]?[-a-zA-Z0-9:]+\b(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*>)';
1260
1261     $parts = preg_split("{($match)}", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
1262
1263     foreach ($parts as $part) {
1264       if (++$index % 2 && $part != '') {
1265         $tokens[] = array('text', $part);
1266       }
1267       else {
1268         $tokens[] = array('tag', $part);
1269       }
1270     }
1271     return $tokens;
1272   }
1273
1274 }
1275
1276 /*
1277 Copyright and License
1278 ---------------------
1279
1280 Copyright (c) 2003 John Gruber
1281 <http://daringfireball.net/>
1282 All rights reserved.
1283
1284 Copyright (c) 2004-2005 Michel Fortin
1285 <http://www.michelf.com>
1286
1287 Redistribution and use in source and binary forms, with or without
1288 modification, are permitted provided that the following conditions are met:
1289
1290 *  Redistributions of source code must retain the above copyright
1291    notice, this list of conditions and the following disclaimer.
1292
1293 *  Redistributions in binary form must reproduce the above copyright
1294    notice, this list of conditions and the following disclaimer in the
1295    documentation and/or other materials provided with the distribution.
1296
1297 *  Neither the name "SmartyPants" nor the names of its contributors may
1298    be used to endorse or promote products derived from this software
1299    without specific prior written permission.
1300
1301 This software is provided by the copyright holders and contributors "as is"
1302 and any express or implied warranties, including, but not limited to, the
1303 implied warranties of merchantability and fitness for a particular purpose
1304 are disclaimed. In no event shall the copyright owner or contributors be
1305 liable for any direct, indirect, incidental, special, exemplary, or
1306 consequential damages (including, but not limited to, procurement of
1307 substitute goods or services; loss of use, data, or profits; or business
1308 interruption) however caused and on any theory of liability, whether in
1309 contract, strict liability, or tort (including negligence or otherwise)
1310 arising in any way out of the use of this software, even if advised of the
1311 possibility of such damage.
1312 */