c3eaf446444fd7df8b8cb024dfe3d8311bd598c9
[yaffs-website] / vendor / michelf / php-markdown / Michelf / Markdown.php
1 <?php
2 /**
3  * Markdown  -  A text-to-HTML conversion tool for web writers
4  *
5  * @package   php-markdown
6  * @author    Michel Fortin <michel.fortin@michelf.com>
7  * @copyright 2004-2016 Michel Fortin <https://michelf.com/projects/php-markdown/>
8  * @copyright (Original Markdown) 2004-2006 John Gruber <https://daringfireball.net/projects/markdown/>
9  */
10
11 namespace Michelf;
12
13 /**
14  * Markdown Parser Class
15  */
16 class Markdown implements MarkdownInterface {
17         /**
18          * Define the package version
19          * @var string
20          */
21         const MARKDOWNLIB_VERSION = "1.7.0";
22
23         /**
24          * Simple function interface - Initialize the parser and return the result
25          * of its transform method. This will work fine for derived classes too.
26          *
27          * @api
28          *
29          * @param  string $text
30          * @return string
31          */
32         public static function defaultTransform($text) {
33                 // Take parser class on which this function was called.
34                 $parser_class = \get_called_class();
35
36                 // Try to take parser from the static parser list
37                 static $parser_list;
38                 $parser =& $parser_list[$parser_class];
39
40                 // Create the parser it not already set
41                 if (!$parser) {
42                         $parser = new $parser_class;
43                 }
44
45                 // Transform text using parser.
46                 return $parser->transform($text);
47         }
48
49         /**
50          * Configuration variables
51          */
52
53         /**
54          * Change to ">" for HTML output.
55          * @var string
56          */
57         public $empty_element_suffix = " />";
58
59         /**
60          * The width of indentation of the output markup
61          * @var int
62          */
63         public $tab_width = 4;
64         
65         /**
66          * Change to `true` to disallow markup or entities.
67          * @var boolean
68          */
69         public $no_markup   = false;
70         public $no_entities = false;
71         
72
73         /**
74          * Change to `true` to enable line breaks on \n without two trailling spaces
75          * @var boolean
76          */
77         public $hard_wrap = false;
78
79         /**
80          * Predefined URLs and titles for reference links and images.
81          * @var array
82          */
83         public $predef_urls   = array();
84         public $predef_titles = array();
85
86         /**
87          * Optional filter function for URLs
88          * @var callable
89          */
90         public $url_filter_func = null;
91
92         /**
93          * Optional header id="" generation callback function.
94          * @var callable
95          */
96         public $header_id_func = null;
97         
98         /**
99          * Optional function for converting code block content to HTML
100          * @var callable
101          */
102         public $code_block_content_func = null;
103
104         /**
105          * Optional function for converting code span content to HTML.
106          * @var callable
107          */
108         public $code_span_content_func = null;
109
110         /**
111          * Class attribute to toggle "enhanced ordered list" behaviour
112          * setting this to true will allow ordered lists to start from the index
113          * number that is defined first.
114          * 
115          * For example:
116          * 2. List item two
117          * 3. List item three
118          *
119          * Becomes:
120          * <ol start="2">
121          * <li>List item two</li>
122          * <li>List item three</li>
123          * </ol>
124          *
125          * @var bool
126          */
127         public $enhanced_ordered_list = false;
128
129         /**
130          * Parser implementation
131          */
132
133         /**
134          * Regex to match balanced [brackets].
135          * Needed to insert a maximum bracked depth while converting to PHP.
136          * @var int
137          */
138         protected $nested_brackets_depth = 6;
139         protected $nested_brackets_re;
140
141         protected $nested_url_parenthesis_depth = 4;
142         protected $nested_url_parenthesis_re;
143
144         /**
145          * Table of hash values for escaped characters:
146          * @var string
147          */
148         protected $escape_chars = '\`*_{}[]()>#+-.!';
149         protected $escape_chars_re;
150
151         /**
152          * Constructor function. Initialize appropriate member variables.
153          * @return void
154          */
155         public function __construct() {
156                 $this->_initDetab();
157                 $this->prepareItalicsAndBold();
158         
159                 $this->nested_brackets_re = 
160                         str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
161                         str_repeat('\])*', $this->nested_brackets_depth);
162         
163                 $this->nested_url_parenthesis_re = 
164                         str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
165                         str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
166                 
167                 $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
168                 
169                 // Sort document, block, and span gamut in ascendent priority order.
170                 asort($this->document_gamut);
171                 asort($this->block_gamut);
172                 asort($this->span_gamut);
173         }
174
175
176         /**
177          * Internal hashes used during transformation.
178          * @var array
179          */
180         protected $urls        = array();
181         protected $titles      = array();
182         protected $html_hashes = array();
183         
184         /**
185          * Status flag to avoid invalid nesting.
186          * @var boolean
187          */
188         protected $in_anchor = false;
189         
190         /**
191          * Called before the transformation process starts to setup parser states.
192          * @return void
193          */
194         protected function setup() {
195                 // Clear global hashes.
196                 $this->urls        = $this->predef_urls;
197                 $this->titles      = $this->predef_titles;
198                 $this->html_hashes = array();
199                 $this->in_anchor   = false;
200         }
201         
202         /**
203          * Called after the transformation process to clear any variable which may
204          * be taking up memory unnecessarly.
205          * @return void
206          */
207         protected function teardown() {
208                 $this->urls        = array();
209                 $this->titles      = array();
210                 $this->html_hashes = array();
211         }
212
213         /**
214          * Main function. Performs some preprocessing on the input text and pass
215          * it through the document gamut.
216          *
217          * @api
218          *
219          * @param  string $text
220          * @return string
221          */
222         public function transform($text) {
223                 $this->setup();
224         
225                 # Remove UTF-8 BOM and marker character in input, if present.
226                 $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
227
228                 # Standardize line endings:
229                 #   DOS to Unix and Mac to Unix
230                 $text = preg_replace('{\r\n?}', "\n", $text);
231
232                 # Make sure $text ends with a couple of newlines:
233                 $text .= "\n\n";
234
235                 # Convert all tabs to spaces.
236                 $text = $this->detab($text);
237
238                 # Turn block-level HTML blocks into hash entries
239                 $text = $this->hashHTMLBlocks($text);
240
241                 # Strip any lines consisting only of spaces and tabs.
242                 # This makes subsequent regexen easier to write, because we can
243                 # match consecutive blank lines with /\n+/ instead of something
244                 # contorted like /[ ]*\n+/ .
245                 $text = preg_replace('/^[ ]+$/m', '', $text);
246
247                 # Run document gamut methods.
248                 foreach ($this->document_gamut as $method => $priority) {
249                         $text = $this->$method($text);
250                 }
251                 
252                 $this->teardown();
253
254                 return $text . "\n";
255         }
256
257         /**
258          * Define the document gamut
259          * @var array
260          */
261         protected $document_gamut = array(
262                 // Strip link definitions, store in hashes.
263                 "stripLinkDefinitions" => 20,
264                 "runBasicBlockGamut"   => 30,
265         );
266
267         /**
268          * Strips link definitions from text, stores the URLs and titles in
269          * hash references
270          * @param  string $text
271          * @return string
272          */
273         protected function stripLinkDefinitions($text) {
274         
275                 $less_than_tab = $this->tab_width - 1;
276
277                 // Link defs are in the form: ^[id]: url "optional title"
278                 $text = preg_replace_callback('{
279                                                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
280                                                           [ ]*
281                                                           \n?                           # maybe *one* newline
282                                                           [ ]*
283                                                         (?:
284                                                           <(.+?)>                       # url = $2
285                                                         |
286                                                           (\S+?)                        # url = $3
287                                                         )
288                                                           [ ]*
289                                                           \n?                           # maybe one newline
290                                                           [ ]*
291                                                         (?:
292                                                                 (?<=\s)                 # lookbehind for whitespace
293                                                                 ["(]
294                                                                 (.*?)                   # title = $4
295                                                                 [")]
296                                                                 [ ]*
297                                                         )?      # title is optional
298                                                         (?:\n+|\Z)
299                         }xm',
300                         array($this, '_stripLinkDefinitions_callback'),
301                         $text
302                 );
303                 return $text;
304         }
305
306         /**
307          * The callback to strip link definitions
308          * @param  array $matches
309          * @return string
310          */
311         protected function _stripLinkDefinitions_callback($matches) {
312                 $link_id = strtolower($matches[1]);
313                 $url = $matches[2] == '' ? $matches[3] : $matches[2];
314                 $this->urls[$link_id] = $url;
315                 $this->titles[$link_id] =& $matches[4];
316                 return ''; // String that will replace the block
317         }
318
319         /**
320          * Hashify HTML blocks
321          * @param  string $text
322          * @return string
323          */
324         protected function hashHTMLBlocks($text) {
325                 if ($this->no_markup) {
326                         return $text;
327                 }
328
329                 $less_than_tab = $this->tab_width - 1;
330
331                 /**
332                  * Hashify HTML blocks:
333                  *
334                  * We only want to do this for block-level HTML tags, such as headers,
335                  * lists, and tables. That's because we still want to wrap <p>s around
336                  * "paragraphs" that are wrapped in non-block-level tags, such as
337                  * anchors, phrase emphasis, and spans. The list of tags we're looking
338                  * for is hard-coded:
339                  *
340                  * *  List "a" is made of tags which can be both inline or block-level.
341                  *    These will be treated block-level when the start tag is alone on 
342                  *    its line, otherwise they're not matched here and will be taken as 
343                  *    inline later.
344                  * *  List "b" is made of tags which are always block-level;
345                  */
346                 $block_tags_a_re = 'ins|del';
347                 $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
348                                                    'script|noscript|style|form|fieldset|iframe|math|svg|'.
349                                                    'article|section|nav|aside|hgroup|header|footer|'.
350                                                    'figure';
351
352                 // Regular expression for the content of a block tag.
353                 $nested_tags_level = 4;
354                 $attr = '
355                         (?>                             # optional tag attributes
356                           \s                    # starts with whitespace
357                           (?>
358                                 [^>"/]+         # text outside quotes
359                           |
360                                 /+(?!>)         # slash not followed by ">"
361                           |
362                                 "[^"]*"         # text inside double quotes (tolerate ">")
363                           |
364                                 \'[^\']*\'      # text inside single quotes (tolerate ">")
365                           )*
366                         )?      
367                         ';
368                 $content =
369                         str_repeat('
370                                 (?>
371                                   [^<]+                 # content without tag
372                                 |
373                                   <\2                   # nested opening tag
374                                         '.$attr.'       # attributes
375                                         (?>
376                                           />
377                                         |
378                                           >', $nested_tags_level).      // end of opening tag
379                                           '.*?'.                                        // last level nested tag content
380                         str_repeat('
381                                           </\2\s*>      # closing nested tag
382                                         )
383                                   |                             
384                                         <(?!/\2\s*>     # other tags with a different name
385                                   )
386                                 )*',
387                                 $nested_tags_level);
388                 $content2 = str_replace('\2', '\3', $content);
389
390                 /**
391                  * First, look for nested blocks, e.g.:
392                  *      <div>
393                  *              <div>
394                  *              tags for inner block must be indented.
395                  *              </div>
396                  *      </div>
397                  *
398                  * The outermost tags must start at the left margin for this to match,
399                  * and the inner nested divs must be indented.
400                  * We need to do this before the next, more liberal match, because the
401                  * next match will start at the first `<div>` and stop at the
402                  * first `</div>`.
403                  */
404                 $text = preg_replace_callback('{(?>
405                         (?>
406                                 (?<=\n)                 # Starting on its own line
407                                 |                               # or
408                                 \A\n?                   # the at beginning of the doc
409                         )
410                         (                                               # save in $1
411
412                           # Match from `\n<tag>` to `</tag>\n`, handling nested tags 
413                           # in between.
414                                         
415                                                 [ ]{0,'.$less_than_tab.'}
416                                                 <('.$block_tags_b_re.')# start tag = $2
417                                                 '.$attr.'>                      # attributes followed by > and \n
418                                                 '.$content.'            # content, support nesting
419                                                 </\2>                           # the matching end tag
420                                                 [ ]*                            # trailing spaces/tabs
421                                                 (?=\n+|\Z)      # followed by a newline or end of document
422
423                         | # Special version for tags of group a.
424
425                                                 [ ]{0,'.$less_than_tab.'}
426                                                 <('.$block_tags_a_re.')# start tag = $3
427                                                 '.$attr.'>[ ]*\n        # attributes followed by >
428                                                 '.$content2.'           # content, support nesting
429                                                 </\3>                           # the matching end tag
430                                                 [ ]*                            # trailing spaces/tabs
431                                                 (?=\n+|\Z)      # followed by a newline or end of document
432                                         
433                         | # Special case just for <hr />. It was easier to make a special 
434                           # case than to make the other regex more complicated.
435                         
436                                                 [ ]{0,'.$less_than_tab.'}
437                                                 <(hr)                           # start tag = $2
438                                                 '.$attr.'                       # attributes
439                                                 /?>                                     # the matching end tag
440                                                 [ ]*
441                                                 (?=\n{2,}|\Z)           # followed by a blank line or end of document
442                         
443                         | # Special case for standalone HTML comments:
444                         
445                                         [ ]{0,'.$less_than_tab.'}
446                                         (?s:
447                                                 <!-- .*? -->
448                                         )
449                                         [ ]*
450                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
451                         
452                         | # PHP and ASP-style processor instructions (<? and <%)
453                         
454                                         [ ]{0,'.$less_than_tab.'}
455                                         (?s:
456                                                 <([?%])                 # $2
457                                                 .*?
458                                                 \2>
459                                         )
460                                         [ ]*
461                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
462                                         
463                         )
464                         )}Sxmi',
465                         array($this, '_hashHTMLBlocks_callback'),
466                         $text
467                 );
468
469                 return $text;
470         }
471
472         /**
473          * The callback for hashing HTML blocks
474          * @param  string $matches
475          * @return string
476          */
477         protected function _hashHTMLBlocks_callback($matches) {
478                 $text = $matches[1];
479                 $key  = $this->hashBlock($text);
480                 return "\n\n$key\n\n";
481         }
482         
483         /**
484          * Called whenever a tag must be hashed when a function insert an atomic 
485          * element in the text stream. Passing $text to through this function gives
486          * a unique text-token which will be reverted back when calling unhash.
487          *
488          * The $boundary argument specify what character should be used to surround
489          * the token. By convension, "B" is used for block elements that needs not
490          * to be wrapped into paragraph tags at the end, ":" is used for elements
491          * that are word separators and "X" is used in the general case.
492          *
493          * @param  string $text
494          * @param  string $boundary
495          * @return string
496          */
497         protected function hashPart($text, $boundary = 'X') {
498                 // Swap back any tag hash found in $text so we do not have to `unhash`
499                 // multiple times at the end.
500                 $text = $this->unhash($text);
501                 
502                 // Then hash the block.
503                 static $i = 0;
504                 $key = "$boundary\x1A" . ++$i . $boundary;
505                 $this->html_hashes[$key] = $text;
506                 return $key; // String that will replace the tag.
507         }
508
509         /**
510          * Shortcut function for hashPart with block-level boundaries.
511          * @param  string $text
512          * @return string
513          */
514         protected function hashBlock($text) {
515                 return $this->hashPart($text, 'B');
516         }
517
518         /**
519          * Define the block gamut - these are all the transformations that form
520          * block-level tags like paragraphs, headers, and list items.
521          * @var array
522          */
523         protected $block_gamut = array(
524                 "doHeaders"         => 10,
525                 "doHorizontalRules" => 20,
526                 "doLists"           => 40,
527                 "doCodeBlocks"      => 50,
528                 "doBlockQuotes"     => 60,
529         );
530
531         /**
532          * Run block gamut tranformations.
533          *
534          * We need to escape raw HTML in Markdown source before doing anything 
535          * else. This need to be done for each block, and not only at the 
536          * begining in the Markdown function since hashed blocks can be part of
537          * list items and could have been indented. Indented blocks would have 
538          * been seen as a code block in a previous pass of hashHTMLBlocks.
539          *
540          * @param  string $text
541          * @return string
542          */
543         protected function runBlockGamut($text) {
544                 $text = $this->hashHTMLBlocks($text);
545                 return $this->runBasicBlockGamut($text);
546         }
547
548         /**
549          * Run block gamut tranformations, without hashing HTML blocks. This is 
550          * useful when HTML blocks are known to be already hashed, like in the first
551          * whole-document pass.
552          *
553          * @param  string $text
554          * @return string
555          */
556         protected function runBasicBlockGamut($text) {
557         
558                 foreach ($this->block_gamut as $method => $priority) {
559                         $text = $this->$method($text);
560                 }
561                 
562                 // Finally form paragraph and restore hashed blocks.
563                 $text = $this->formParagraphs($text);
564
565                 return $text;
566         }
567
568         /**
569          * Convert horizontal rules
570          * @param  string $text
571          * @return string
572          */
573         protected function doHorizontalRules($text) {
574                 return preg_replace(
575                         '{
576                                 ^[ ]{0,3}       # Leading space
577                                 ([-*_])         # $1: First marker
578                                 (?>                     # Repeated marker group
579                                         [ ]{0,2}        # Zero, one, or two spaces.
580                                         \1                      # Marker character
581                                 ){2,}           # Group repeated at least twice
582                                 [ ]*            # Tailing spaces
583                                 $                       # End of line.
584                         }mx',
585                         "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n", 
586                         $text
587                 );
588         }
589
590         /**
591          * These are all the transformations that occur *within* block-level
592          * tags like paragraphs, headers, and list items.
593          * @var array
594          */
595         protected $span_gamut = array(
596                 // Process character escapes, code spans, and inline HTML
597                 // in one shot.
598                 "parseSpan"           => -30,
599                 // Process anchor and image tags. Images must come first,
600                 // because ![foo][f] looks like an anchor.
601                 "doImages"            =>  10,
602                 "doAnchors"           =>  20,
603                 // Make links out of things like `<https://example.com/>`
604                 // Must come after doAnchors, because you can use < and >
605                 // delimiters in inline links like [this](<url>).
606                 "doAutoLinks"         =>  30,
607                 "encodeAmpsAndAngles" =>  40,
608                 "doItalicsAndBold"    =>  50,
609                 "doHardBreaks"        =>  60,
610         );
611
612         /**
613          * Run span gamut transformations
614          * @param  string $text
615          * @return string
616          */
617         protected function runSpanGamut($text) {
618                 foreach ($this->span_gamut as $method => $priority) {
619                         $text = $this->$method($text);
620                 }
621
622                 return $text;
623         }
624
625         /**
626          * Do hard breaks
627          * @param  string $text
628          * @return string
629          */
630         protected function doHardBreaks($text) {
631                 if ($this->hard_wrap) {
632                         return preg_replace_callback('/ *\n/', 
633                                 array($this, '_doHardBreaks_callback'), $text);
634                 } else {
635                         return preg_replace_callback('/ {2,}\n/', 
636                                 array($this, '_doHardBreaks_callback'), $text);
637                 }
638         }
639
640         /**
641          * Trigger part hashing for the hard break (callback method)
642          * @param  array $matches
643          * @return string
644          */
645         protected function _doHardBreaks_callback($matches) {
646                 return $this->hashPart("<br$this->empty_element_suffix\n");
647         }
648
649         /**
650          * Turn Markdown link shortcuts into XHTML <a> tags.
651          * @param  string $text
652          * @return string
653          */
654         protected function doAnchors($text) {
655                 if ($this->in_anchor) {
656                         return $text;
657                 }
658                 $this->in_anchor = true;
659                 
660                 // First, handle reference-style links: [link text] [id]
661                 $text = preg_replace_callback('{
662                         (                                       # wrap whole match in $1
663                           \[
664                                 ('.$this->nested_brackets_re.') # link text = $2
665                           \]
666
667                           [ ]?                          # one optional space
668                           (?:\n[ ]*)?           # one optional newline followed by spaces
669
670                           \[
671                                 (.*?)           # id = $3
672                           \]
673                         )
674                         }xs',
675                         array($this, '_doAnchors_reference_callback'), $text);
676
677                 // Next, inline-style links: [link text](url "optional title")
678                 $text = preg_replace_callback('{
679                         (                               # wrap whole match in $1
680                           \[
681                                 ('.$this->nested_brackets_re.') # link text = $2
682                           \]
683                           \(                    # literal paren
684                                 [ \n]*
685                                 (?:
686                                         <(.+?)> # href = $3
687                                 |
688                                         ('.$this->nested_url_parenthesis_re.')  # href = $4
689                                 )
690                                 [ \n]*
691                                 (                       # $5
692                                   ([\'"])       # quote char = $6
693                                   (.*?)         # Title = $7
694                                   \6            # matching quote
695                                   [ \n]*        # ignore any spaces/tabs between closing quote and )
696                                 )?                      # title is optional
697                           \)
698                         )
699                         }xs',
700                         array($this, '_doAnchors_inline_callback'), $text);
701
702                 // Last, handle reference-style shortcuts: [link text]
703                 // These must come last in case you've also got [link text][1]
704                 // or [link text](/foo)
705                 $text = preg_replace_callback('{
706                         (                                       # wrap whole match in $1
707                           \[
708                                 ([^\[\]]+)              # link text = $2; can\'t contain [ or ]
709                           \]
710                         )
711                         }xs',
712                         array($this, '_doAnchors_reference_callback'), $text);
713
714                 $this->in_anchor = false;
715                 return $text;
716         }
717
718         /**
719          * Callback method to parse referenced anchors
720          * @param  string $matches
721          * @return string
722          */
723         protected function _doAnchors_reference_callback($matches) {
724                 $whole_match =  $matches[1];
725                 $link_text   =  $matches[2];
726                 $link_id     =& $matches[3];
727
728                 if ($link_id == "") {
729                         // for shortcut links like [this][] or [this].
730                         $link_id = $link_text;
731                 }
732                 
733                 // lower-case and turn embedded newlines into spaces
734                 $link_id = strtolower($link_id);
735                 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
736
737                 if (isset($this->urls[$link_id])) {
738                         $url = $this->urls[$link_id];
739                         $url = $this->encodeURLAttribute($url);
740                         
741                         $result = "<a href=\"$url\"";
742                         if ( isset( $this->titles[$link_id] ) ) {
743                                 $title = $this->titles[$link_id];
744                                 $title = $this->encodeAttribute($title);
745                                 $result .=  " title=\"$title\"";
746                         }
747                 
748                         $link_text = $this->runSpanGamut($link_text);
749                         $result .= ">$link_text</a>";
750                         $result = $this->hashPart($result);
751                 } else {
752                         $result = $whole_match;
753                 }
754                 return $result;
755         }
756
757         /**
758          * Callback method to parse inline anchors
759          * @param  string $matches
760          * @return string
761          */
762         protected function _doAnchors_inline_callback($matches) {
763                 $whole_match    =  $matches[1];
764                 $link_text              =  $this->runSpanGamut($matches[2]);
765                 $url                    =  $matches[3] == '' ? $matches[4] : $matches[3];
766                 $title                  =& $matches[7];
767
768                 // If the URL was of the form <s p a c e s> it got caught by the HTML
769                 // tag parser and hashed. Need to reverse the process before using
770                 // the URL.
771                 $unhashed = $this->unhash($url);
772                 if ($unhashed != $url)
773                         $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
774
775                 $url = $this->encodeURLAttribute($url);
776
777                 $result = "<a href=\"$url\"";
778                 if (isset($title)) {
779                         $title = $this->encodeAttribute($title);
780                         $result .=  " title=\"$title\"";
781                 }
782                 
783                 $link_text = $this->runSpanGamut($link_text);
784                 $result .= ">$link_text</a>";
785
786                 return $this->hashPart($result);
787         }
788
789         /**
790          * Turn Markdown image shortcuts into <img> tags.
791          * @param  string $text
792          * @return string
793          */
794         protected function doImages($text) {
795                 // First, handle reference-style labeled images: ![alt text][id]
796                 $text = preg_replace_callback('{
797                         (                               # wrap whole match in $1
798                           !\[
799                                 ('.$this->nested_brackets_re.')         # alt text = $2
800                           \]
801
802                           [ ]?                          # one optional space
803                           (?:\n[ ]*)?           # one optional newline followed by spaces
804
805                           \[
806                                 (.*?)           # id = $3
807                           \]
808
809                         )
810                         }xs', 
811                         array($this, '_doImages_reference_callback'), $text);
812
813                 // Next, handle inline images:  ![alt text](url "optional title")
814                 // Don't forget: encode * and _
815                 $text = preg_replace_callback('{
816                         (                               # wrap whole match in $1
817                           !\[
818                                 ('.$this->nested_brackets_re.')         # alt text = $2
819                           \]
820                           \s?                   # One optional whitespace character
821                           \(                    # literal paren
822                                 [ \n]*
823                                 (?:
824                                         <(\S*)> # src url = $3
825                                 |
826                                         ('.$this->nested_url_parenthesis_re.')  # src url = $4
827                                 )
828                                 [ \n]*
829                                 (                       # $5
830                                   ([\'"])       # quote char = $6
831                                   (.*?)         # title = $7
832                                   \6            # matching quote
833                                   [ \n]*
834                                 )?                      # title is optional
835                           \)
836                         )
837                         }xs',
838                         array($this, '_doImages_inline_callback'), $text);
839
840                 return $text;
841         }
842
843         /**
844          * Callback to parse references image tags
845          * @param  array $matches
846          * @return string
847          */
848         protected function _doImages_reference_callback($matches) {
849                 $whole_match = $matches[1];
850                 $alt_text    = $matches[2];
851                 $link_id     = strtolower($matches[3]);
852
853                 if ($link_id == "") {
854                         $link_id = strtolower($alt_text); // for shortcut links like ![this][].
855                 }
856
857                 $alt_text = $this->encodeAttribute($alt_text);
858                 if (isset($this->urls[$link_id])) {
859                         $url = $this->encodeURLAttribute($this->urls[$link_id]);
860                         $result = "<img src=\"$url\" alt=\"$alt_text\"";
861                         if (isset($this->titles[$link_id])) {
862                                 $title = $this->titles[$link_id];
863                                 $title = $this->encodeAttribute($title);
864                                 $result .=  " title=\"$title\"";
865                         }
866                         $result .= $this->empty_element_suffix;
867                         $result = $this->hashPart($result);
868                 } else {
869                         // If there's no such link ID, leave intact:
870                         $result = $whole_match;
871                 }
872
873                 return $result;
874         }
875
876         /**
877          * Callback to parse inline image tags
878          * @param  array $matches
879          * @return string
880          */
881         protected function _doImages_inline_callback($matches) {
882                 $whole_match    = $matches[1];
883                 $alt_text               = $matches[2];
884                 $url                    = $matches[3] == '' ? $matches[4] : $matches[3];
885                 $title                  =& $matches[7];
886
887                 $alt_text = $this->encodeAttribute($alt_text);
888                 $url = $this->encodeURLAttribute($url);
889                 $result = "<img src=\"$url\" alt=\"$alt_text\"";
890                 if (isset($title)) {
891                         $title = $this->encodeAttribute($title);
892                         $result .=  " title=\"$title\""; // $title already quoted
893                 }
894                 $result .= $this->empty_element_suffix;
895
896                 return $this->hashPart($result);
897         }
898
899         /**
900          * Parse Markdown heading elements to HTML
901          * @param  string $text
902          * @return string
903          */
904         protected function doHeaders($text) {
905                 /**
906                  * Setext-style headers:
907                  *        Header 1
908                  *        ========
909                  *  
910                  *        Header 2
911                  *        --------
912                  */
913                 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
914                         array($this, '_doHeaders_callback_setext'), $text);
915
916                 /**
917                  * atx-style headers:
918                  *   # Header 1
919                  *   ## Header 2
920                  *   ## Header 2 with closing hashes ##
921                  *   ...
922                  *   ###### Header 6
923                  */
924                 $text = preg_replace_callback('{
925                                 ^(\#{1,6})      # $1 = string of #\'s
926                                 [ ]*
927                                 (.+?)           # $2 = Header text
928                                 [ ]*
929                                 \#*                     # optional closing #\'s (not counted)
930                                 \n+
931                         }xm',
932                         array($this, '_doHeaders_callback_atx'), $text);
933
934                 return $text;
935         }
936
937         /**
938          * Setext header parsing callback
939          * @param  array $matches
940          * @return string
941          */
942         protected function _doHeaders_callback_setext($matches) {
943                 // Terrible hack to check we haven't found an empty list item.
944                 if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1])) {
945                         return $matches[0];
946                 }
947                 
948                 $level = $matches[2]{0} == '=' ? 1 : 2;
949
950                 // ID attribute generation
951                 $idAtt = $this->_generateIdFromHeaderValue($matches[1]);
952
953                 $block = "<h$level$idAtt>".$this->runSpanGamut($matches[1])."</h$level>";
954                 return "\n" . $this->hashBlock($block) . "\n\n";
955         }
956
957         /**
958          * ATX header parsing callback
959          * @param  array $matches
960          * @return string
961          */
962         protected function _doHeaders_callback_atx($matches) {
963                 // ID attribute generation
964                 $idAtt = $this->_generateIdFromHeaderValue($matches[2]);
965
966                 $level = strlen($matches[1]);
967                 $block = "<h$level$idAtt>".$this->runSpanGamut($matches[2])."</h$level>";
968                 return "\n" . $this->hashBlock($block) . "\n\n";
969         }
970
971         /**
972          * If a header_id_func property is set, we can use it to automatically
973          * generate an id attribute.
974          *
975          * This method returns a string in the form id="foo", or an empty string
976          * otherwise.
977          * @param  string $headerValue
978          * @return string
979          */
980         protected function _generateIdFromHeaderValue($headerValue) {           
981                 if (!is_callable($this->header_id_func)) {
982                         return "";
983                 }
984
985                 $idValue = call_user_func($this->header_id_func, $headerValue);
986                 if (!$idValue) {
987                         return "";
988                 }
989
990                 return ' id="' . $this->encodeAttribute($idValue) . '"';
991         }
992
993         /**
994          * Form HTML ordered (numbered) and unordered (bulleted) lists.
995          * @param  string $text
996          * @return string
997          */
998         protected function doLists($text) {
999                 $less_than_tab = $this->tab_width - 1;
1000
1001                 // Re-usable patterns to match list item bullets and number markers:
1002                 $marker_ul_re  = '[*+-]';
1003                 $marker_ol_re  = '\d+[\.]';
1004
1005                 $markers_relist = array(
1006                         $marker_ul_re => $marker_ol_re,
1007                         $marker_ol_re => $marker_ul_re,
1008                         );
1009
1010                 foreach ($markers_relist as $marker_re => $other_marker_re) {
1011                         // Re-usable pattern to match any entirel ul or ol list:
1012                         $whole_list_re = '
1013                                 (                                                               # $1 = whole list
1014                                   (                                                             # $2
1015                                         ([ ]{0,'.$less_than_tab.'})     # $3 = number of spaces
1016                                         ('.$marker_re.')                        # $4 = first list item marker
1017                                         [ ]+
1018                                   )
1019                                   (?s:.+?)
1020                                   (                                                             # $5
1021                                           \z
1022                                         |
1023                                           \n{2,}
1024                                           (?=\S)
1025                                           (?!                                           # Negative lookahead for another list item marker
1026                                                 [ ]*
1027                                                 '.$marker_re.'[ ]+
1028                                           )
1029                                         |
1030                                           (?=                                           # Lookahead for another kind of list
1031                                             \n
1032                                                 \3                                              # Must have the same indentation
1033                                                 '.$other_marker_re.'[ ]+
1034                                           )
1035                                   )
1036                                 )
1037                         '; // mx
1038                         
1039                         // We use a different prefix before nested lists than top-level lists.
1040                         //See extended comment in _ProcessListItems().
1041                 
1042                         if ($this->list_level) {
1043                                 $text = preg_replace_callback('{
1044                                                 ^
1045                                                 '.$whole_list_re.'
1046                                         }mx',
1047                                         array($this, '_doLists_callback'), $text);
1048                         } else {
1049                                 $text = preg_replace_callback('{
1050                                                 (?:(?<=\n)\n|\A\n?) # Must eat the newline
1051                                                 '.$whole_list_re.'
1052                                         }mx',
1053                                         array($this, '_doLists_callback'), $text);
1054                         }
1055                 }
1056
1057                 return $text;
1058         }
1059
1060         /**
1061          * List parsing callback
1062          * @param  array $matches
1063          * @return string
1064          */
1065         protected function _doLists_callback($matches) {
1066                 // Re-usable patterns to match list item bullets and number markers:
1067                 $marker_ul_re  = '[*+-]';
1068                 $marker_ol_re  = '\d+[\.]';
1069                 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
1070                 $marker_ol_start_re = '[0-9]+';
1071
1072                 $list = $matches[1];
1073                 $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
1074
1075                 $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
1076
1077                 $list .= "\n";
1078                 $result = $this->processListItems($list, $marker_any_re);
1079
1080                 $ol_start = 1;
1081                 if ($this->enhanced_ordered_list) {
1082                         // Get the start number for ordered list.
1083                         if ($list_type == 'ol') {
1084                                 $ol_start_array = array();
1085                                 $ol_start_check = preg_match("/$marker_ol_start_re/", $matches[4], $ol_start_array);
1086                                 if ($ol_start_check){
1087                                         $ol_start = $ol_start_array[0];
1088                                 }
1089                         }
1090                 }
1091
1092                 if ($ol_start > 1 && $list_type == 'ol'){
1093                         $result = $this->hashBlock("<$list_type start=\"$ol_start\">\n" . $result . "</$list_type>");
1094                 } else {
1095                         $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
1096                 }
1097                 return "\n". $result ."\n\n";
1098         }
1099
1100         /**
1101          * Nesting tracker for list levels
1102          * @var integer
1103          */
1104         protected $list_level = 0;
1105
1106         /**
1107          * Process the contents of a single ordered or unordered list, splitting it
1108          * into individual list items.
1109          * @param  string $list_str
1110          * @param  string $marker_any_re
1111          * @return string
1112          */
1113         protected function processListItems($list_str, $marker_any_re) {
1114                 /**
1115                  * The $this->list_level global keeps track of when we're inside a list.
1116                  * Each time we enter a list, we increment it; when we leave a list,
1117                  * we decrement. If it's zero, we're not in a list anymore.
1118                  *
1119                  * We do this because when we're not inside a list, we want to treat
1120                  * something like this:
1121                  *
1122                  *              I recommend upgrading to version
1123                  *              8. Oops, now this line is treated
1124                  *              as a sub-list.
1125                  *
1126                  * As a single paragraph, despite the fact that the second line starts
1127                  * with a digit-period-space sequence.
1128                  *
1129                  * Whereas when we're inside a list (or sub-list), that line will be
1130                  * treated as the start of a sub-list. What a kludge, huh? This is
1131                  * an aspect of Markdown's syntax that's hard to parse perfectly
1132                  * without resorting to mind-reading. Perhaps the solution is to
1133                  * change the syntax rules such that sub-lists must start with a
1134                  * starting cardinal number; e.g. "1." or "a.".
1135                  */             
1136                 $this->list_level++;
1137
1138                 // Trim trailing blank lines:
1139                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1140
1141                 $list_str = preg_replace_callback('{
1142                         (\n)?                                                   # leading line = $1
1143                         (^[ ]*)                                                 # leading whitespace = $2
1144                         ('.$marker_any_re.'                             # list marker and space = $3
1145                                 (?:[ ]+|(?=\n)) # space only required if item is not empty
1146                         )
1147                         ((?s:.*?))                                              # list item text   = $4
1148                         (?:(\n+(?=\n))|\n)                              # tailing blank line = $5
1149                         (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
1150                         }xm',
1151                         array($this, '_processListItems_callback'), $list_str);
1152
1153                 $this->list_level--;
1154                 return $list_str;
1155         }
1156
1157         /**
1158          * List item parsing callback
1159          * @param  array $matches
1160          * @return string
1161          */
1162         protected function _processListItems_callback($matches) {
1163                 $item = $matches[4];
1164                 $leading_line =& $matches[1];
1165                 $leading_space =& $matches[2];
1166                 $marker_space = $matches[3];
1167                 $tailing_blank_line =& $matches[5];
1168
1169                 if ($leading_line || $tailing_blank_line || 
1170                         preg_match('/\n{2,}/', $item))
1171                 {
1172                         // Replace marker with the appropriate whitespace indentation
1173                         $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
1174                         $item = $this->runBlockGamut($this->outdent($item)."\n");
1175                 } else {
1176                         // Recursion for sub-lists:
1177                         $item = $this->doLists($this->outdent($item));
1178                         $item = $this->formParagraphs($item, false);
1179                 }
1180
1181                 return "<li>" . $item . "</li>\n";
1182         }
1183
1184         /**
1185          * Process Markdown `<pre><code>` blocks.
1186          * @param  string $text
1187          * @return string
1188          */
1189         protected function doCodeBlocks($text) {
1190                 $text = preg_replace_callback('{
1191                                 (?:\n\n|\A\n?)
1192                                 (                   # $1 = the code block -- one or more lines, starting with a space/tab
1193                                   (?>
1194                                         [ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
1195                                         .*\n+
1196                                   )+
1197                                 )
1198                                 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1199                         }xm',
1200                         array($this, '_doCodeBlocks_callback'), $text);
1201
1202                 return $text;
1203         }
1204
1205         /**
1206          * Code block parsing callback
1207          * @param  array $matches
1208          * @return string
1209          */
1210         protected function _doCodeBlocks_callback($matches) {
1211                 $codeblock = $matches[1];
1212
1213                 $codeblock = $this->outdent($codeblock);
1214                 if ($this->code_block_content_func) {
1215                         $codeblock = call_user_func($this->code_block_content_func, $codeblock, "");
1216                 } else {
1217                         $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1218                 }
1219
1220                 # trim leading newlines and trailing newlines
1221                 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
1222
1223                 $codeblock = "<pre><code>$codeblock\n</code></pre>";
1224                 return "\n\n" . $this->hashBlock($codeblock) . "\n\n";
1225         }
1226
1227         /**
1228          * Create a code span markup for $code. Called from handleSpanToken.
1229          * @param  string $code
1230          * @return string
1231          */
1232         protected function makeCodeSpan($code) {
1233                 if ($this->code_span_content_func) {
1234                         $code = call_user_func($this->code_span_content_func, $code);
1235                 } else {
1236                         $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
1237                 }
1238                 return $this->hashPart("<code>$code</code>");
1239         }
1240
1241         /**
1242          * Define the emphasis operators with their regex matches
1243          * @var array
1244          */
1245         protected $em_relist = array(
1246                 ''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?![\.,:;]?\s)',
1247                 '*' => '(?<![\s*])\*(?!\*)',
1248                 '_' => '(?<![\s_])_(?!_)',
1249         );
1250
1251         /**
1252          * Define the strong operators with their regex matches
1253          * @var array
1254          */
1255         protected $strong_relist = array(
1256                 ''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?![\.,:;]?\s)',
1257                 '**' => '(?<![\s*])\*\*(?!\*)',
1258                 '__' => '(?<![\s_])__(?!_)',
1259         );
1260
1261         /**
1262          * Define the emphasis + strong operators with their regex matches
1263          * @var array
1264          */
1265         protected $em_strong_relist = array(
1266                 ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?![\.,:;]?\s)',
1267                 '***' => '(?<![\s*])\*\*\*(?!\*)',
1268                 '___' => '(?<![\s_])___(?!_)',
1269         );
1270
1271         /**
1272          * Container for prepared regular expressions
1273          * @var array
1274          */
1275         protected $em_strong_prepared_relist;
1276         
1277         /**
1278          * Prepare regular expressions for searching emphasis tokens in any
1279          * context.
1280          * @return void
1281          */
1282         protected function prepareItalicsAndBold() {
1283                 foreach ($this->em_relist as $em => $em_re) {
1284                         foreach ($this->strong_relist as $strong => $strong_re) {
1285                                 // Construct list of allowed token expressions.
1286                                 $token_relist = array();
1287                                 if (isset($this->em_strong_relist["$em$strong"])) {
1288                                         $token_relist[] = $this->em_strong_relist["$em$strong"];
1289                                 }
1290                                 $token_relist[] = $em_re;
1291                                 $token_relist[] = $strong_re;
1292                                 
1293                                 // Construct master expression from list.
1294                                 $token_re = '{(' . implode('|', $token_relist) . ')}';
1295                                 $this->em_strong_prepared_relist["$em$strong"] = $token_re;
1296                         }
1297                 }
1298         }
1299
1300         /**
1301          * Convert Markdown italics (emphasis) and bold (strong) to HTML
1302          * @param  string $text
1303          * @return string
1304          */
1305         protected function doItalicsAndBold($text) {
1306                 $token_stack = array('');
1307                 $text_stack = array('');
1308                 $em = '';
1309                 $strong = '';
1310                 $tree_char_em = false;
1311                 
1312                 while (1) {
1313                         // Get prepared regular expression for seraching emphasis tokens
1314                         // in current context.
1315                         $token_re = $this->em_strong_prepared_relist["$em$strong"];
1316                         
1317                         // Each loop iteration search for the next emphasis token. 
1318                         // Each token is then passed to handleSpanToken.
1319                         $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
1320                         $text_stack[0] .= $parts[0];
1321                         $token =& $parts[1];
1322                         $text =& $parts[2];
1323                         
1324                         if (empty($token)) {
1325                                 // Reached end of text span: empty stack without emitting.
1326                                 // any more emphasis.
1327                                 while ($token_stack[0]) {
1328                                         $text_stack[1] .= array_shift($token_stack);
1329                                         $text_stack[0] .= array_shift($text_stack);
1330                                 }
1331                                 break;
1332                         }
1333                         
1334                         $token_len = strlen($token);
1335                         if ($tree_char_em) {
1336                                 // Reached closing marker while inside a three-char emphasis.
1337                                 if ($token_len == 3) {
1338                                         // Three-char closing marker, close em and strong.
1339                                         array_shift($token_stack);
1340                                         $span = array_shift($text_stack);
1341                                         $span = $this->runSpanGamut($span);
1342                                         $span = "<strong><em>$span</em></strong>";
1343                                         $text_stack[0] .= $this->hashPart($span);
1344                                         $em = '';
1345                                         $strong = '';
1346                                 } else {
1347                                         // Other closing marker: close one em or strong and
1348                                         // change current token state to match the other
1349                                         $token_stack[0] = str_repeat($token{0}, 3-$token_len);
1350                                         $tag = $token_len == 2 ? "strong" : "em";
1351                                         $span = $text_stack[0];
1352                                         $span = $this->runSpanGamut($span);
1353                                         $span = "<$tag>$span</$tag>";
1354                                         $text_stack[0] = $this->hashPart($span);
1355                                         $$tag = ''; // $$tag stands for $em or $strong
1356                                 }
1357                                 $tree_char_em = false;
1358                         } else if ($token_len == 3) {
1359                                 if ($em) {
1360                                         // Reached closing marker for both em and strong.
1361                                         // Closing strong marker:
1362                                         for ($i = 0; $i < 2; ++$i) {
1363                                                 $shifted_token = array_shift($token_stack);
1364                                                 $tag = strlen($shifted_token) == 2 ? "strong" : "em";
1365                                                 $span = array_shift($text_stack);
1366                                                 $span = $this->runSpanGamut($span);
1367                                                 $span = "<$tag>$span</$tag>";
1368                                                 $text_stack[0] .= $this->hashPart($span);
1369                                                 $$tag = ''; // $$tag stands for $em or $strong
1370                                         }
1371                                 } else {
1372                                         // Reached opening three-char emphasis marker. Push on token 
1373                                         // stack; will be handled by the special condition above.
1374                                         $em = $token{0};
1375                                         $strong = "$em$em";
1376                                         array_unshift($token_stack, $token);
1377                                         array_unshift($text_stack, '');
1378                                         $tree_char_em = true;
1379                                 }
1380                         } else if ($token_len == 2) {
1381                                 if ($strong) {
1382                                         // Unwind any dangling emphasis marker:
1383                                         if (strlen($token_stack[0]) == 1) {
1384                                                 $text_stack[1] .= array_shift($token_stack);
1385                                                 $text_stack[0] .= array_shift($text_stack);
1386                                         }
1387                                         // Closing strong marker:
1388                                         array_shift($token_stack);
1389                                         $span = array_shift($text_stack);
1390                                         $span = $this->runSpanGamut($span);
1391                                         $span = "<strong>$span</strong>";
1392                                         $text_stack[0] .= $this->hashPart($span);
1393                                         $strong = '';
1394                                 } else {
1395                                         array_unshift($token_stack, $token);
1396                                         array_unshift($text_stack, '');
1397                                         $strong = $token;
1398                                 }
1399                         } else {
1400                                 // Here $token_len == 1
1401                                 if ($em) {
1402                                         if (strlen($token_stack[0]) == 1) {
1403                                                 // Closing emphasis marker:
1404                                                 array_shift($token_stack);
1405                                                 $span = array_shift($text_stack);
1406                                                 $span = $this->runSpanGamut($span);
1407                                                 $span = "<em>$span</em>";
1408                                                 $text_stack[0] .= $this->hashPart($span);
1409                                                 $em = '';
1410                                         } else {
1411                                                 $text_stack[0] .= $token;
1412                                         }
1413                                 } else {
1414                                         array_unshift($token_stack, $token);
1415                                         array_unshift($text_stack, '');
1416                                         $em = $token;
1417                                 }
1418                         }
1419                 }
1420                 return $text_stack[0];
1421         }
1422
1423         /**
1424          * Parse Markdown blockquotes to HTML
1425          * @param  string $text
1426          * @return string
1427          */
1428         protected function doBlockQuotes($text) {
1429                 $text = preg_replace_callback('/
1430                           (                                                             # Wrap whole match in $1
1431                                 (?>
1432                                   ^[ ]*>[ ]?                    # ">" at the start of a line
1433                                         .+\n                                    # rest of the first line
1434                                   (.+\n)*                                       # subsequent consecutive lines
1435                                   \n*                                           # blanks
1436                                 )+
1437                           )
1438                         /xm',
1439                         array($this, '_doBlockQuotes_callback'), $text);
1440
1441                 return $text;
1442         }
1443
1444         /**
1445          * Blockquote parsing callback
1446          * @param  array $matches
1447          * @return string
1448          */
1449         protected function _doBlockQuotes_callback($matches) {
1450                 $bq = $matches[1];
1451                 // trim one level of quoting - trim whitespace-only lines
1452                 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
1453                 $bq = $this->runBlockGamut($bq); // recurse
1454
1455                 $bq = preg_replace('/^/m', "  ", $bq);
1456                 // These leading spaces cause problem with <pre> content, 
1457                 // so we need to fix that:
1458                 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx', 
1459                         array($this, '_doBlockQuotes_callback2'), $bq);
1460
1461                 return "\n" . $this->hashBlock("<blockquote>\n$bq\n</blockquote>") . "\n\n";
1462         }
1463
1464         /**
1465          * Blockquote parsing callback
1466          * @param  array $matches
1467          * @return string
1468          */
1469         protected function _doBlockQuotes_callback2($matches) {
1470                 $pre = $matches[1];
1471                 $pre = preg_replace('/^  /m', '', $pre);
1472                 return $pre;
1473         }
1474
1475         /**
1476          * Parse paragraphs
1477          *
1478          * @param  string $text String to process in paragraphs
1479          * @param  boolean $wrap_in_p Whether paragraphs should be wrapped in <p> tags
1480          * @return string
1481          */
1482         protected function formParagraphs($text, $wrap_in_p = true) {
1483                 // Strip leading and trailing lines:
1484                 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1485
1486                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1487
1488                 // Wrap <p> tags and unhashify HTML blocks
1489                 foreach ($grafs as $key => $value) {
1490                         if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
1491                                 // Is a paragraph.
1492                                 $value = $this->runSpanGamut($value);
1493                                 if ($wrap_in_p) {
1494                                         $value = preg_replace('/^([ ]*)/', "<p>", $value);
1495                                         $value .= "</p>";
1496                                 }
1497                                 $grafs[$key] = $this->unhash($value);
1498                         } else {
1499                                 // Is a block.
1500                                 // Modify elements of @grafs in-place...
1501                                 $graf = $value;
1502                                 $block = $this->html_hashes[$graf];
1503                                 $graf = $block;
1504 //                              if (preg_match('{
1505 //                                      \A
1506 //                                      (                                                       # $1 = <div> tag
1507 //                                        <div  \s+
1508 //                                        [^>]*
1509 //                                        \b
1510 //                                        markdown\s*=\s*  ([\'"])      #       $2 = attr quote char
1511 //                                        1
1512 //                                        \2
1513 //                                        [^>]*
1514 //                                        >
1515 //                                      )
1516 //                                      (                                                       # $3 = contents
1517 //                                      .*
1518 //                                      )
1519 //                                      (</div>)                                        # $4 = closing tag
1520 //                                      \z
1521 //                                      }xs', $block, $matches))
1522 //                              {
1523 //                                      list(, $div_open, , $div_content, $div_close) = $matches;
1524 //
1525 //                                      // We can't call Markdown(), because that resets the hash;
1526 //                                      // that initialization code should be pulled into its own sub, though.
1527 //                                      $div_content = $this->hashHTMLBlocks($div_content);
1528 //                                      
1529 //                                      // Run document gamut methods on the content.
1530 //                                      foreach ($this->document_gamut as $method => $priority) {
1531 //                                              $div_content = $this->$method($div_content);
1532 //                                      }
1533 //
1534 //                                      $div_open = preg_replace(
1535 //                                              '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
1536 //
1537 //                                      $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1538 //                              }
1539                                 $grafs[$key] = $graf;
1540                         }
1541                 }
1542
1543                 return implode("\n\n", $grafs);
1544         }
1545
1546         /**
1547          * Encode text for a double-quoted HTML attribute. This function
1548          * is *not* suitable for attributes enclosed in single quotes.
1549          * @param  string $text
1550          * @return string
1551          */
1552         protected function encodeAttribute($text) {
1553                 $text = $this->encodeAmpsAndAngles($text);
1554                 $text = str_replace('"', '&quot;', $text);
1555                 return $text;
1556         }
1557
1558         /**
1559          * Encode text for a double-quoted HTML attribute containing a URL,
1560          * applying the URL filter if set. Also generates the textual
1561          * representation for the URL (removing mailto: or tel:) storing it in $text.
1562          * This function is *not* suitable for attributes enclosed in single quotes.
1563          *
1564          * @param  string $url
1565          * @param  string &$text Passed by reference
1566          * @return string        URL
1567          */
1568         protected function encodeURLAttribute($url, &$text = null) {
1569                 if ($this->url_filter_func) {
1570                         $url = call_user_func($this->url_filter_func, $url);
1571                 }
1572
1573                 if (preg_match('{^mailto:}i', $url)) {
1574                         $url = $this->encodeEntityObfuscatedAttribute($url, $text, 7);
1575                 } else if (preg_match('{^tel:}i', $url)) {
1576                         $url = $this->encodeAttribute($url);
1577                         $text = substr($url, 4);
1578                 } else {
1579                         $url = $this->encodeAttribute($url);
1580                         $text = $url;
1581                 }
1582
1583                 return $url;
1584         }
1585         
1586         /**
1587          * Smart processing for ampersands and angle brackets that need to 
1588          * be encoded. Valid character entities are left alone unless the
1589          * no-entities mode is set.
1590          * @param  string $text
1591          * @return string
1592          */
1593         protected function encodeAmpsAndAngles($text) {
1594                 if ($this->no_entities) {
1595                         $text = str_replace('&', '&amp;', $text);
1596                 } else {
1597                         // Ampersand-encoding based entirely on Nat Irons's Amputator
1598                         // MT plugin: <http://bumppo.net/projects/amputator/>
1599                         $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', 
1600                                                                 '&amp;', $text);
1601                 }
1602                 // Encode remaining <'s
1603                 $text = str_replace('<', '&lt;', $text);
1604
1605                 return $text;
1606         }
1607
1608         /**
1609          * Parse Markdown automatic links to anchor HTML tags
1610          * @param  string $text
1611          * @return string
1612          */
1613         protected function doAutoLinks($text) {
1614                 $text = preg_replace_callback('{<((https?|ftp|dict|tel):[^\'">\s]+)>}i',
1615                         array($this, '_doAutoLinks_url_callback'), $text);
1616
1617                 // Email addresses: <address@domain.foo>
1618                 $text = preg_replace_callback('{
1619                         <
1620                         (?:mailto:)?
1621                         (
1622                                 (?:
1623                                         [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
1624                                 |
1625                                         ".*?"
1626                                 )
1627                                 \@
1628                                 (?:
1629                                         [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
1630                                 |
1631                                         \[[\d.a-fA-F:]+\]       # IPv4 & IPv6
1632                                 )
1633                         )
1634                         >
1635                         }xi',
1636                         array($this, '_doAutoLinks_email_callback'), $text);
1637
1638                 return $text;
1639         }
1640
1641         /**
1642          * Parse URL callback
1643          * @param  array $matches
1644          * @return string
1645          */
1646         protected function _doAutoLinks_url_callback($matches) {
1647                 $url = $this->encodeURLAttribute($matches[1], $text);
1648                 $link = "<a href=\"$url\">$text</a>";
1649                 return $this->hashPart($link);
1650         }
1651
1652         /**
1653          * Parse email address callback
1654          * @param  array $matches
1655          * @return string
1656          */
1657         protected function _doAutoLinks_email_callback($matches) {
1658                 $addr = $matches[1];
1659                 $url = $this->encodeURLAttribute("mailto:$addr", $text);
1660                 $link = "<a href=\"$url\">$text</a>";
1661                 return $this->hashPart($link);
1662         }
1663
1664         /**
1665          * Input: some text to obfuscate, e.g. "mailto:foo@example.com"
1666          *
1667          * Output: the same text but with most characters encoded as either a
1668          *         decimal or hex entity, in the hopes of foiling most address
1669          *         harvesting spam bots. E.g.:
1670          *
1671          *        &#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
1672          *        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
1673          *        &#x6d;
1674          *
1675          * Note: the additional output $tail is assigned the same value as the
1676          * ouput, minus the number of characters specified by $head_length.
1677          *
1678          * Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
1679          * With some optimizations by Milian Wolff. Forced encoding of HTML
1680          * attribute special characters by Allan Odgaard.
1681          *
1682          * @param  string  $text
1683          * @param  string  &$tail
1684          * @param  integer $head_length
1685          * @return string
1686          */
1687         protected function encodeEntityObfuscatedAttribute($text, &$tail = null, $head_length = 0) {
1688                 if ($text == "") {
1689                         return $tail = "";
1690                 }
1691
1692                 $chars = preg_split('/(?<!^)(?!$)/', $text);
1693                 $seed = (int)abs(crc32($text) / strlen($text)); // Deterministic seed.
1694
1695                 foreach ($chars as $key => $char) {
1696                         $ord = ord($char);
1697                         // Ignore non-ascii chars.
1698                         if ($ord < 128) {
1699                                 $r = ($seed * (1 + $key)) % 100; // Pseudo-random function.
1700                                 // roughly 10% raw, 45% hex, 45% dec
1701                                 // '@' *must* be encoded. I insist.
1702                                 // '"' and '>' have to be encoded inside the attribute
1703                                 if ($r > 90 && strpos('@"&>', $char) === false) {
1704                                         /* do nothing */
1705                                 } else if ($r < 45) {
1706                                         $chars[$key] = '&#x'.dechex($ord).';';
1707                                 } else {
1708                                         $chars[$key] = '&#'.$ord.';';
1709                                 }
1710                         }
1711                 }
1712
1713                 $text = implode('', $chars);
1714                 $tail = $head_length ? implode('', array_slice($chars, $head_length)) : $text;
1715
1716                 return $text;
1717         }
1718
1719         /**
1720          * Take the string $str and parse it into tokens, hashing embeded HTML,
1721          * escaped characters and handling code spans.
1722          * @param  string $str
1723          * @return string
1724          */
1725         protected function parseSpan($str) {
1726                 $output = '';
1727                 
1728                 $span_re = '{
1729                                 (
1730                                         \\\\'.$this->escape_chars_re.'
1731                                 |
1732                                         (?<![`\\\\])
1733                                         `+                                              # code span marker
1734                         '.( $this->no_markup ? '' : '
1735                                 |
1736                                         <!--    .*?     -->             # comment
1737                                 |
1738                                         <\?.*?\?> | <%.*?%>             # processing instruction
1739                                 |
1740                                         <[!$]?[-a-zA-Z0-9:_]+   # regular tags
1741                                         (?>
1742                                                 \s
1743                                                 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
1744                                         )?
1745                                         >
1746                                 |
1747                                         <[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag
1748                                 |
1749                                         </[-a-zA-Z0-9:_]+\s*> # closing tag
1750                         ').'
1751                                 )
1752                                 }xs';
1753
1754                 while (1) {
1755                         // Each loop iteration seach for either the next tag, the next 
1756                         // openning code span marker, or the next escaped character. 
1757                         // Each token is then passed to handleSpanToken.
1758                         $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
1759                         
1760                         // Create token from text preceding tag.
1761                         if ($parts[0] != "") {
1762                                 $output .= $parts[0];
1763                         }
1764                         
1765                         // Check if we reach the end.
1766                         if (isset($parts[1])) {
1767                                 $output .= $this->handleSpanToken($parts[1], $parts[2]);
1768                                 $str = $parts[2];
1769                         } else {
1770                                 break;
1771                         }
1772                 }
1773                 
1774                 return $output;
1775         }
1776
1777         /**
1778          * Handle $token provided by parseSpan by determining its nature and
1779          * returning the corresponding value that should replace it.
1780          * @param  string $token
1781          * @param  string &$str
1782          * @return string
1783          */
1784         protected function handleSpanToken($token, &$str) {
1785                 switch ($token{0}) {
1786                         case "\\":
1787                                 return $this->hashPart("&#". ord($token{1}). ";");
1788                         case "`":
1789                                 // Search for end marker in remaining text.
1790                                 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm', 
1791                                         $str, $matches))
1792                                 {
1793                                         $str = $matches[2];
1794                                         $codespan = $this->makeCodeSpan($matches[1]);
1795                                         return $this->hashPart($codespan);
1796                                 }
1797                                 return $token; // Return as text since no ending marker found.
1798                         default:
1799                                 return $this->hashPart($token);
1800                 }
1801         }
1802
1803         /**
1804          * Remove one level of line-leading tabs or spaces
1805          * @param  string $text
1806          * @return string
1807          */
1808         protected function outdent($text) {
1809                 return preg_replace('/^(\t|[ ]{1,' . $this->tab_width . '})/m', '', $text);
1810         }
1811
1812
1813         /**
1814          * String length function for detab. `_initDetab` will create a function to
1815          * handle UTF-8 if the default function does not exist.
1816          * @var string
1817          */
1818         protected $utf8_strlen = 'mb_strlen';
1819
1820         /**
1821          * Replace tabs with the appropriate amount of spaces.
1822          *
1823          * For each line we separate the line in blocks delemited by tab characters.
1824          * Then we reconstruct every line by adding the  appropriate number of space
1825          * between each blocks.
1826          * 
1827          * @param  string $text
1828          * @return string
1829          */
1830         protected function detab($text) {       
1831                 $text = preg_replace_callback('/^.*\t.*$/m',
1832                         array($this, '_detab_callback'), $text);
1833
1834                 return $text;
1835         }
1836
1837         /**
1838          * Replace tabs callback
1839          * @param  string $matches
1840          * @return string
1841          */
1842         protected function _detab_callback($matches) {
1843                 $line = $matches[0];
1844                 $strlen = $this->utf8_strlen; // strlen function for UTF-8.
1845                 
1846                 // Split in blocks.
1847                 $blocks = explode("\t", $line);
1848                 // Add each blocks to the line.
1849                 $line = $blocks[0];
1850                 unset($blocks[0]); // Do not add first block twice.
1851                 foreach ($blocks as $block) {
1852                         // Calculate amount of space, insert spaces, insert block.
1853                         $amount = $this->tab_width - 
1854                                 $strlen($line, 'UTF-8') % $this->tab_width;
1855                         $line .= str_repeat(" ", $amount) . $block;
1856                 }
1857                 return $line;
1858         }
1859
1860         /**
1861          * Check for the availability of the function in the `utf8_strlen` property
1862          * (initially `mb_strlen`). If the function is not available, create a 
1863          * function that will loosely count the number of UTF-8 characters with a
1864          * regular expression.
1865          * @return void
1866          */
1867         protected function _initDetab() {
1868         
1869                 if (function_exists($this->utf8_strlen)) {
1870                         return;
1871                 }
1872
1873                 $this->utf8_strlen = create_function('$text', 'return preg_match_all(
1874                         "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/", 
1875                         $text, $m);');
1876         }
1877
1878         /**
1879          * Swap back in all the tags hashed by _HashHTMLBlocks.
1880          * @param  string $text
1881          * @return string
1882          */
1883         protected function unhash($text) {
1884                 return preg_replace_callback('/(.)\x1A[0-9]+\1/', 
1885                         array($this, '_unhash_callback'), $text);
1886         }
1887
1888         /**
1889          * Unhashing callback
1890          * @param  array $matches
1891          * @return string
1892          */
1893         protected function _unhash_callback($matches) {
1894                 return $this->html_hashes[$matches[0]];
1895         }
1896 }