', so they can be
* styled with CSS. Ampersands are also normalized to '&. Requires
* ampersands to have whitespace or an ' ' on both sides.
*
* It won't mess up & that are already wrapped, in entities or URLs
*
* @param string
*
* @return string
*/
public static function amp($text) {
$amp_finder = "/(\s| )(&|&|&\#38;|&)(\s| )/";
return preg_replace($amp_finder, '\\1&\\3', $text);
}
/**
* Puts a before and after an &ndash or —.
*
* Dashes may have whitespace or an `` `` on both sides
*
* @param string
*
* @return string
*/
public static function dash($text) {
$dash_finder = "/(\s| | )*(—|–|–|–|—|—)(\s| | )*/";
return preg_replace($dash_finder, ' \\2 ', $text);
}
/**
* Helper method for caps method - used for preg_replace_callback.
*/
public static function _cap_wrapper($matchobj) {
if (!empty($matchobj[2])) {
return sprintf('%s', $matchobj[2]);
}
else {
$mthree = $matchobj[3];
if (($mthree{strlen($mthree) - 1}) == ' ') {
$caps = substr($mthree, 0, -1);
$tail = ' ';
}
else {
$caps = $mthree;
$tail = $matchobj[4];
}
return sprintf('%s%s', $caps, $tail);
}
}
/**
* Stylable capitals.
*
* Wraps multiple capital letters in ````
* so they can be styled with CSS.
*
* Uses the smartypants tokenizer to not screw with HTML or with tags it
* shouldn't.
*/
public static function caps($text) {
$tokens = SmartyPants::tokenizeHtml($text);
$result = array();
$in_skipped_tag = FALSE;
$cap_finder = "/(
(\b[[\p{Lu}=\d]* # Group 2: Any amount of caps and digits
[[\p{Lu}][[\p{Lu}\d]* # A cap string much at least include two caps (but they can have digits between them)
(?:&)? # allowing ampersand in caps.
[[\p{Lu}'\d]*[[\p{Lu}\d]) # Any amount of caps and digits
| (\b[[\p{Lu}]+\.\s? # OR: Group 3: Some caps, followed by a '.' and an optional space
(?:[[\p{Lu}]+\.\s?)+) # Followed by the same thing at least once more
(\s|\b|$|[)}\]>]))/xu";
foreach ($tokens as $token) {
if ($token[0] == 'tag') {
// Don't mess with tags.
$result[] = $token[1];
$close_match = preg_match(SmartyPants::SMARTYPANTS_TAGS_TO_SKIP, $token[1]);
if ($close_match) {
$in_skipped_tag = true;
}
else {
$in_skipped_tag = false;
}
}
else {
if ($in_skipped_tag) {
$result[] = $token[1];
}
else {
$result[] = preg_replace_callback($cap_finder, 'self::_cap_wrapper', $token[1]);
}
}
}
return implode('', $result);
}
/**
* Helper method for initial_quotes method - used for preg_replace_callback.
*/
public static function _quote_wrapper($matchobj) {
if (!empty($matchobj[7])) {
$classname = 'dquo';
$quote = $matchobj[7];
}
else {
$classname = 'quo';
$quote = $matchobj[8];
}
return sprintf('%s%s', $matchobj[1], $classname, $quote);
}
/**
* initial_quotes.
*
* Wraps initial quotes in ``class="dquo"`` for double quotes or
* ``class="quo"`` for single quotes. Works in these block tags ``(h1-h6, p, li)``
* and also accounts for potential opening inline elements ``a, em, strong, span, b, i``
* Optionally choose to apply quote span tags to Gullemets as well.
*/
public static function initial_quotes($text, $do_guillemets = false) {
$quote_finder = "/((<(p|h[1-6]|li)[^>]*>|^) # start with an opening p, h1-6, li or the start of the string
\s* # optional white space!
(<(a|em|span|strong|i|b)[^>]*>\s*)*) # optional opening inline tags, with more optional white space for each.
((\"|“|&\#8220;)|('|‘|&\#8216;)) # Find me a quote! (only need to find the left quotes and the primes)
# double quotes are in group 7, singles in group 8
/ix";
if ($do_guillemets) {
$quote_finder = "/((<(p|h[1-6]|li)[^>]*>|^) # start with an opening p, h1-6, li or the start of the string
\s* # optional white space!
(<(a|em|span|strong|i|b)[^>]*>\s*)*) # optional opening inline tags, with more optional white space for each.
((\"|“|&\#8220;|\xAE|&\#171;|«)|('|‘|&\#8216;)) # Find me a quote! (only need to find the left quotes and the primes) - also look for guillemets (>> and << characters))
# double quotes are in group 7, singles in group 8
/ix";
}
return preg_replace_callback($quote_finder, 'self::_quote_wrapper', $text);
}
/**
* widont.
*
* Replaces the space between the last two words in a string with `` ``
* Works in these block tags ``(h1-h6, p, li)`` and also accounts for
* potential closing inline elements ``a, em, strong, span, b, i``
*
* Empty HTMLs shouldn't error
*/
public static function widont($text) {
// This regex is a beast, tread lightly
$widont_finder = "/([^<>\s]+|<\/span>) # ensure more than 1 word
(\s+) # the space to replace
([^<>\s]+ # must be flollowed by non-tag non-space characters
\s* # optional white space!
(<\/(a|em|span|strong|i|b)[^>]*>\s*)* # optional closing inline tags with optional white space after each
((<\/(p|h[1-6]|li|dt|dd)>)|$)) # end with a closing p, h1-6, li or the end of the string
/x";
return preg_replace($widont_finder, '$1 $3', $text);
}
/**
* typogrify.
*
* The super typography filter.
* Applies the following filters: widont, smartypants, caps, amp, initial_quotes
* Optionally choose to apply quote span tags to Gullemets as well.
*/
public static function filter($text, $do_guillemets = FALSE) {
$text = self::amp($text);
$text = self::widont($text);
$text = SmartyPants::process($text);
$text = self::caps($text);
$text = self::initial_quotes($text, $do_guillemets);
$text = self::dash($text);
return $text;
}
}