--- /dev/null
+<?php
+
+namespace Drupal\typogrify;
+
+/**
+ * Return the unicode conversion maps.
+ */
+class UnicodeConversion {
+
+ /**
+ * Provides Unicode-mapping.
+ *
+ * @param string $type
+ * The map type we're looking for, one of 'ligature', 'punctuation',
+ * 'arrow' 'nested' or 'all'.
+ *
+ * @return array
+ * Array of conversions, keyed by the original string.
+ */
+ public static function map($type = 'all') {
+ $map = array(
+ // See http://www.unicode.org/charts/PDF/UFB00.pdf .
+ 'ligature' => array(
+ 'ffi' => 'ffi',
+ 'ffl' => 'ffl',
+ 'ff' => 'ff',
+ 'fi' => 'fi',
+ 'fl' => 'fl',
+ 'ij' => 'ij',
+ 'IJ' => 'IJ',
+ 'st' => 'st',
+ 'ss' => 'ß',
+ ),
+ // See http:#www.unicode.org/charts/PDF/U2000.pdf .
+ 'punctuation' => array(
+ '...' => '…',
+ '..' => '‥',
+ '. . .' => '…',
+ '---' => '—',
+ '--' => '–',
+ ),
+ 'fraction' => array(
+ '1/4' => '¼',
+ '1/2' => '½',
+ '3/4' => '¾',
+ '1/3' => '⅓',
+ '2/3' => '⅔',
+ '1/5' => '⅕',
+ '2/5' => '⅖',
+ '3/5' => '⅗',
+ '4/5' => '⅘',
+ '1/6' => '⅙',
+ '5/6' => '⅚',
+ '1/8' => '⅛',
+ '3/8' => '⅜',
+ '5/8' => '⅝',
+ '7/8' => '⅞',
+ ),
+ 'quotes' => array(
+ ',,' => '„',
+ "''" => '”',
+ '<<' => '«',
+ '>>' => '»',
+ ),
+ // See http:#www.unicode.org/charts/PDF/U2190.pdf .
+ 'arrow' => array(
+ '->>' => '↠',
+ '<<-' => '↞',
+ '->|' => '⇥',
+ '|<-' => '⇤',
+ '<->' => '↔',
+ '->' => '→',
+ '<-' => '←',
+ '<=>' => '⇔',
+ '=>' => '⇒',
+ '<=' => '⇐',
+ ),
+ );
+
+ if ($type == 'all') {
+ return array_merge($map['ligature'], $map['arrow'], $map['punctuation'], $map['quotes'], $map['fraction']);
+ }
+ elseif ($type == 'nested') {
+ return $map;
+ }
+ else {
+ return $map[$type];
+ }
+ }
+
+ /**
+ * Perform character conversion.
+ *
+ * @param string $text
+ * Text to be parsed.
+ * @param array $characters_to_convert
+ * Array of ASCII characters to convert.
+ *
+ * @return string
+ * The result of the conversion.
+ */
+ public static function convertCharacters($text, $characters_to_convert) {
+ if (($characters_to_convert == NULL) || (count($characters_to_convert) < 1)) {
+ // Do nothing.
+ return $text;
+ }
+
+ // Get ascii to unicode mappings.
+ $unicode_map = self::map();
+
+ foreach ($characters_to_convert as $ascii_string) {
+ $unicode_strings[] = $unicode_map[$ascii_string];
+ }
+
+ $tokens = SmartyPants::tokenizeHtml($text);
+ $result = '';
+ // Keep track of when we're inside <pre> or <code> tags.
+ $in_pre = 0;
+ foreach ($tokens as $cur_token) {
+ if ($cur_token[0] == "tag") {
+ // Don't mess with text inside tags, <pre> blocks, or <code> blocks.
+ $result .= $cur_token[1];
+ // Get the tags to skip regex from SmartyPants.
+ if (preg_match(SmartyPants::SMARTYPANTS_TAGS_TO_SKIP, $cur_token[1], $matches)) {
+ $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
+ }
+ }
+ else {
+ $t = $cur_token[1];
+ if ($in_pre == 0) {
+ $t = SmartyPants::processEscapes($t);
+ $t = str_replace($characters_to_convert, $unicode_strings, $t);
+ }
+ $result .= $t;
+ }
+ }
+ return $result;
+ }
+
+}