3 namespace Drupal\Tests\system\Functional\Mail;
5 use Drupal\Component\Utility\Html;
6 use Drupal\Component\Utility\Unicode;
7 use Drupal\Core\Mail\MailFormatHelper;
8 use Drupal\Core\Site\Settings;
9 use Drupal\Tests\BrowserTestBase;
12 * Tests for \Drupal\Core\Mail\MailFormatHelper::htmlToText().
16 class HtmlToTextTest extends BrowserTestBase {
18 * Converts a string to its PHP source equivalent for display in test messages.
21 * The text string to convert.
24 * An HTML representation of the text string that, when displayed in a
25 * browser, represents the PHP source code equivalent of $text.
27 protected function stringToHtml($text) {
37 * Helper function to test \Drupal\Core\Mail\MailFormatHelper::htmlToText().
40 * The source HTML string to be converted.
42 * The expected result of converting $html to text.
44 * A text message to display in the assertion message.
45 * @param $allowed_tags
46 * (optional) An array of allowed tags, or NULL to default to the full
47 * set of tags supported by
48 * \Drupal\Core\Mail\MailFormatHelper::htmlToText().
50 protected function assertHtmlToText($html, $text, $message, $allowed_tags = NULL) {
51 preg_match_all('/<([a-z0-6]+)/', Unicode::strtolower($html), $matches);
52 $tested_tags = implode(', ', array_unique($matches[1]));
53 $message .= ' (' . $tested_tags . ')';
54 $result = MailFormatHelper::htmlToText($html, $allowed_tags);
55 $pass = $this->assertEqual($result, $text, Html::escape($message));
56 $verbose = 'html = <pre>' . $this->stringToHtml($html)
57 . '</pre><br />' . 'result = <pre>' . $this->stringToHtml($result)
58 . '</pre><br />' . 'expected = <pre>' . $this->stringToHtml($text)
60 $this->verbose($verbose);
62 $this->pass("Previous test verbose info:<br />$verbose");
67 * Test supported tags of \Drupal\Core\Mail\MailFormatHelper::htmlToText().
69 public function testTags() {
70 global $base_path, $base_url;
72 // @todo Trailing linefeeds should be trimmed.
73 '<a href = "https://www.drupal.org">Drupal.org</a>' => "Drupal.org [1]\n\n[1] https://www.drupal.org\n",
74 // @todo Footer URLs should be absolute.
75 "<a href = \"$base_path\">Homepage</a>" => "Homepage [1]\n\n[1] $base_url/\n",
76 '<address>Drupal</address>' => "Drupal\n",
77 // @todo The <address> tag is currently not supported.
78 '<address>Drupal</address><address>Drupal</address>' => "DrupalDrupal\n",
79 '<b>Drupal</b>' => "*Drupal*\n",
80 // @todo There should be a space between the '>' and the text.
81 '<blockquote>Drupal</blockquote>' => ">Drupal\n",
82 '<blockquote>Drupal</blockquote><blockquote>Drupal</blockquote>' => ">Drupal\n>Drupal\n",
83 '<br />Drupal<br />Drupal<br /><br />Drupal' => "Drupal\nDrupal\nDrupal\n",
84 '<br/>Drupal<br/>Drupal<br/><br/>Drupal' => "Drupal\nDrupal\nDrupal\n",
85 // @todo There should be two line breaks before the paragraph.
86 '<br/>Drupal<br/>Drupal<br/><br/>Drupal<p>Drupal</p>' => "Drupal\nDrupal\nDrupal\nDrupal\n\n",
87 '<div>Drupal</div>' => "Drupal\n",
88 // @todo The <div> tag is currently not supported.
89 '<div>Drupal</div><div>Drupal</div>' => "DrupalDrupal\n",
90 '<em>Drupal</em>' => "/Drupal/\n",
91 '<h1>Drupal</h1>' => "======== DRUPAL ==============================================================\n\n",
92 '<h1>Drupal</h1><p>Drupal</p>' => "======== DRUPAL ==============================================================\n\nDrupal\n\n",
93 '<h2>Drupal</h2>' => "-------- DRUPAL --------------------------------------------------------------\n\n",
94 '<h2>Drupal</h2><p>Drupal</p>' => "-------- DRUPAL --------------------------------------------------------------\n\nDrupal\n\n",
95 '<h3>Drupal</h3>' => ".... Drupal\n\n",
96 '<h3>Drupal</h3><p>Drupal</p>' => ".... Drupal\n\nDrupal\n\n",
97 '<h4>Drupal</h4>' => ".. Drupal\n\n",
98 '<h4>Drupal</h4><p>Drupal</p>' => ".. Drupal\n\nDrupal\n\n",
99 '<h5>Drupal</h5>' => "Drupal\n\n",
100 '<h5>Drupal</h5><p>Drupal</p>' => "Drupal\n\nDrupal\n\n",
101 '<h6>Drupal</h6>' => "Drupal\n\n",
102 '<h6>Drupal</h6><p>Drupal</p>' => "Drupal\n\nDrupal\n\n",
103 '<hr />Drupal<hr />' => "------------------------------------------------------------------------------\nDrupal\n------------------------------------------------------------------------------\n",
104 '<hr/>Drupal<hr/>' => "------------------------------------------------------------------------------\nDrupal\n------------------------------------------------------------------------------\n",
105 '<hr/>Drupal<hr/><p>Drupal</p>' => "------------------------------------------------------------------------------\nDrupal\n------------------------------------------------------------------------------\nDrupal\n\n",
106 '<i>Drupal</i>' => "/Drupal/\n",
107 '<p>Drupal</p>' => "Drupal\n\n",
108 '<p>Drupal</p><p>Drupal</p>' => "Drupal\n\nDrupal\n\n",
109 '<strong>Drupal</strong>' => "*Drupal*\n",
110 // @todo Tables are currently not supported.
111 '<table><tr><td>Drupal</td><td>Drupal</td></tr><tr><td>Drupal</td><td>Drupal</td></tr></table>' => "DrupalDrupalDrupalDrupal\n",
112 '<table><tr><td>Drupal</td></tr></table><p>Drupal</p>' => "Drupal\nDrupal\n\n",
113 // @todo The <u> tag is currently not supported.
114 '<u>Drupal</u>' => "Drupal\n",
115 '<ul><li>Drupal</li></ul>' => " * Drupal\n\n",
116 '<ul><li>Drupal <em>Drupal</em> Drupal</li></ul>' => " * Drupal /Drupal/ Drupal\n\n",
117 // @todo Lines containing nothing but spaces should be trimmed.
118 '<ul><li>Drupal</li><li><ol><li>Drupal</li><li>Drupal</li></ol></li></ul>' => " * Drupal\n * 1) Drupal\n 2) Drupal\n \n\n",
119 '<ul><li>Drupal</li><li><ol><li>Drupal</li></ol></li><li>Drupal</li></ul>' => " * Drupal\n * 1) Drupal\n \n * Drupal\n\n",
120 '<ul><li>Drupal</li><li>Drupal</li></ul>' => " * Drupal\n * Drupal\n\n",
121 '<ul><li>Drupal</li></ul><p>Drupal</p>' => " * Drupal\n\nDrupal\n\n",
122 '<ol><li>Drupal</li></ol>' => " 1) Drupal\n\n",
123 '<ol><li>Drupal</li><li><ul><li>Drupal</li><li>Drupal</li></ul></li></ol>' => " 1) Drupal\n 2) * Drupal\n * Drupal\n \n\n",
124 '<ol><li>Drupal</li><li>Drupal</li></ol>' => " 1) Drupal\n 2) Drupal\n\n",
125 '<ol>Drupal</ol>' => "Drupal\n\n",
126 '<ol><li>Drupal</li></ol><p>Drupal</p>' => " 1) Drupal\n\nDrupal\n\n",
127 '<dl><dt>Drupal</dt></dl>' => "Drupal\n\n",
128 '<dl><dt>Drupal</dt><dd>Drupal</dd></dl>' => "Drupal\n Drupal\n\n",
129 '<dl><dt>Drupal</dt><dd>Drupal</dd><dt>Drupal</dt><dd>Drupal</dd></dl>' => "Drupal\n Drupal\nDrupal\n Drupal\n\n",
130 '<dl><dt>Drupal</dt><dd>Drupal</dd></dl><p>Drupal</p>' => "Drupal\n Drupal\n\nDrupal\n\n",
131 '<dl><dt>Drupal<dd>Drupal</dl>' => "Drupal\n Drupal\n\n",
132 '<dl><dt>Drupal</dt></dl><p>Drupal</p>' => "Drupal\n\nDrupal\n\n",
133 // @todo Again, lines containing only spaces should be trimmed.
134 '<ul><li>Drupal</li><li><dl><dt>Drupal</dt><dd>Drupal</dd><dt>Drupal</dt><dd>Drupal</dd></dl></li><li>Drupal</li></ul>' => " * Drupal\n * Drupal\n Drupal\n Drupal\n Drupal\n \n * Drupal\n\n",
135 // Tests malformed HTML tags.
136 '<br>Drupal<br>Drupal' => "Drupal\nDrupal\n",
137 '<hr>Drupal<hr>Drupal' => "------------------------------------------------------------------------------\nDrupal\n------------------------------------------------------------------------------\nDrupal\n",
138 '<ol><li>Drupal<li>Drupal</ol>' => " 1) Drupal\n 2) Drupal\n\n",
139 '<ul><li>Drupal <em>Drupal</em> Drupal</ul></ul>' => " * Drupal /Drupal/ Drupal\n\n",
140 '<ul><li>Drupal<li>Drupal</ol>' => " * Drupal\n * Drupal\n\n",
141 '<ul><li>Drupal<li>Drupal</ul>' => " * Drupal\n * Drupal\n\n",
142 '<ul>Drupal</ul>' => "Drupal\n\n",
143 'Drupal</ul></ol></dl><li>Drupal' => "Drupal\n * Drupal\n",
144 '<dl>Drupal</dl>' => "Drupal\n\n",
145 '<dl>Drupal</dl><p>Drupal</p>' => "Drupal\n\nDrupal\n\n",
146 '<dt>Drupal</dt>' => "Drupal\n",
147 // Tests some unsupported HTML tags.
148 '<html>Drupal</html>' => "Drupal\n",
149 // @todo Perhaps the contents of <script> tags should be dropped.
150 '<script type="text/javascript">Drupal</script>' => "Drupal\n",
151 // A couple of tests for Unicode characters.
152 '<q>I <em>will</em> be back…</q>' => "I /will/ be back…\n",
153 'FrançAIS is ÜBER-åwesome' => "FrançAIS is ÜBER-åwesome\n",
156 foreach ($tests as $html => $text) {
157 $this->assertHtmlToText($html, $text, 'Supported tags');
162 * Tests allowing tags in \Drupal\Core\Mail\MailFormatHelper::htmlToText().
164 public function testDrupalHtmlToTextArgs() {
165 // The second parameter of \Drupal\Core\Mail\MailFormatHelper::htmlToText()
166 // overrules the allowed tags.
167 $this->assertHtmlToText(
168 'Drupal <b>Drupal</b> Drupal',
169 "Drupal *Drupal* Drupal\n",
170 'Allowed <b> tag found',
173 $this->assertHtmlToText(
174 'Drupal <h1>Drupal</h1> Drupal',
175 "Drupal Drupal Drupal\n",
176 'Disallowed <h1> tag not found',
180 $this->assertHtmlToText(
181 'Drupal <p><em><b>Drupal</b></em><p> Drupal',
182 "Drupal Drupal Drupal\n",
183 'Disallowed <p>, <em>, and <b> tags not found',
187 $this->assertHtmlToText(
188 '<html><body>Drupal</body></html>',
190 'Unsupported <html> and <body> tags not found',
196 * Test that whitespace is collapsed.
198 public function testDrupalHtmltoTextCollapsesWhitespace() {
199 $input = "<p>Drupal Drupal\n\nDrupal<pre>Drupal Drupal\n\nDrupal</pre>Drupal Drupal\n\nDrupal</p>";
200 // @todo The whitespace should be collapsed.
201 $collapsed = "Drupal Drupal\n\nDrupalDrupal Drupal\n\nDrupalDrupal Drupal\n\nDrupal\n\n";
202 $this->assertHtmlToText(
205 'Whitespace is collapsed',
211 * Test that text separated by block-level tags in HTML get separated by
212 * (at least) a newline in the plaintext version.
214 public function testDrupalHtmlToTextBlockTagToNewline() {
216 . '<blockquote>[blockquote]</blockquote>'
218 . '<dl><dt>[dl-dt]</dt>'
221 . '<dd>[dd-dl]</dd></dl>'
229 . '<ol><li>[ol-li]</li>'
231 . '<li>[li-ol]</li></ol>'
233 . '<ul><li>[ul-li]</li>'
234 . '<li>[li-ul]</li></ul>'
236 $output = MailFormatHelper::htmlToText($input);
237 $pass = $this->assertFalse(
238 preg_match('/\][^\n]*\[/s', $output),
239 'Block-level HTML tags should force newlines'
242 $this->verbose($this->stringToHtml($output));
244 $output_upper = Unicode::strtoupper($output);
245 $upper_input = Unicode::strtoupper($input);
246 $upper_output = MailFormatHelper::htmlToText($upper_input);
247 $pass = $this->assertEqual(
250 'Tag recognition should be case-insensitive'
255 . '<br />should be equal to <br />'
262 * Test that headers are properly separated from surrounding text.
264 public function testHeaderSeparation() {
265 $html = 'Drupal<h1>Drupal</h1>Drupal';
266 // @todo There should be more space above the header than below it.
267 $text = "Drupal\n======== DRUPAL ==============================================================\n\nDrupal\n";
268 $this->assertHtmlToText($html, $text,
269 'Text before and after <h1> tag');
270 $html = '<p>Drupal</p><h1>Drupal</h1>Drupal';
271 // @todo There should be more space above the header than below it.
272 $text = "Drupal\n\n======== DRUPAL ==============================================================\n\nDrupal\n";
273 $this->assertHtmlToText($html, $text,
274 'Paragraph before and text after <h1> tag');
275 $html = 'Drupal<h1>Drupal</h1><p>Drupal</p>';
276 // @todo There should be more space above the header than below it.
277 $text = "Drupal\n======== DRUPAL ==============================================================\n\nDrupal\n\n";
278 $this->assertHtmlToText($html, $text,
279 'Text before and paragraph after <h1> tag');
280 $html = '<p>Drupal</p><h1>Drupal</h1><p>Drupal</p>';
281 $text = "Drupal\n\n======== DRUPAL ==============================================================\n\nDrupal\n\n";
282 $this->assertHtmlToText($html, $text,
283 'Paragraph before and after <h1> tag');
287 * Test that footnote references are properly generated.
289 public function testFootnoteReferences() {
290 global $base_path, $base_url;
291 $source = '<a href="http://www.example.com/node/1">Host and path</a>'
292 . '<br /><a href="http://www.example.com">Host, no path</a>'
293 . '<br /><a href="' . $base_path . 'node/1">Path, no host</a>'
294 . '<br /><a href="node/1">Relative path</a>';
295 // @todo Footnote URLs should be absolute.
296 $tt = "Host and path [1]"
297 . "\nHost, no path [2]"
298 // @todo The following two references should be combined.
299 . "\nPath, no host [3]"
300 . "\nRelative path [4]"
302 . "\n[1] http://www.example.com/node/1"
303 . "\n[2] http://www.example.com"
304 // @todo The following two references should be combined.
305 . "\n[3] $base_url/node/1"
307 $this->assertHtmlToText($source, $tt, 'Footnotes');
311 * Test that combinations of paragraph breaks, line breaks, linefeeds,
312 * and spaces are properly handled.
314 public function testDrupalHtmlToTextParagraphs() {
317 'html' => "<p>line 1<br />\nline 2<br />line 3\n<br />line 4</p><p>paragraph</p>",
318 // @todo Trailing line breaks should be trimmed.
319 'text' => "line 1\nline 2\nline 3\nline 4\n\nparagraph\n\n",
322 'html' => "<p>line 1<br /> line 2</p> <p>line 4<br /> line 5</p> <p>0</p>",
323 // @todo Trailing line breaks should be trimmed.
324 'text' => "line 1\nline 2\n\nline 4\nline 5\n\n0\n\n",
326 foreach ($tests as $test) {
327 $this->assertHtmlToText($test['html'], $test['text'], 'Paragraph breaks');
332 * Tests \Drupal\Core\Mail\MailFormatHelper::htmlToText() wrapping.
334 * RFC 3676 says, "The Text/Plain media type is the lowest common
335 * denominator of Internet email, with lines of no more than 998 characters."
337 * RFC 2046 says, "SMTP [RFC-821] allows a maximum of 998 octets before the
338 * next CRLF sequence."
340 * RFC 821 says, "The maximum total length of a text line including the
341 * <CRLF> is 1000 characters."
343 public function testVeryLongLineWrap() {
344 $input = 'Drupal<br /><p>' . str_repeat('x', 2100) . '</p><br />Drupal';
345 $output = MailFormatHelper::htmlToText($input);
346 $eol = Settings::get('mail_line_endings', PHP_EOL);
348 $maximum_line_length = 0;
349 foreach (explode($eol, $output) as $line) {
350 // We must use strlen() rather than Unicode::strlen() in order to count
351 // octets rather than characters.
352 $maximum_line_length = max($maximum_line_length, strlen($line . $eol));
354 $verbose = 'Maximum line length found was ' . $maximum_line_length . ' octets.';
355 $this->assertTrue($maximum_line_length <= 1000, $verbose);
359 * Tests that trailing whitespace is removed before newlines.
361 * @see \Drupal\Core\Mail\MailFormatHelper::wrapMail()
363 public function testRemoveTrailingWhitespace() {
364 $text = "Hi there! \nHerp Derp";
365 $mail_lines = explode("\n", MailFormatHelper::wrapMail($text));
366 $this->assertNotEqual(" ", substr($mail_lines[0], -1), 'Trailing whitespace removed.');
370 * Tests that trailing whitespace from Usenet style signatures is not removed.
372 * RFC 3676 says, "This is a special case; an (optionally quoted or quoted and
373 * stuffed) line consisting of DASH DASH SP is neither fixed nor flowed."
375 * @see \Drupal\Core\Mail\MailFormatHelper::wrapMail()
377 public function testUsenetSignature() {
378 $text = "Hi there!\n-- \nHerp Derp";
379 $mail_lines = explode("\n", MailFormatHelper::wrapMail($text));
380 $this->assertEqual("-- ", $mail_lines[1], 'Trailing whitespace not removed for dash-dash-space signatures.');
382 $text = "Hi there!\n-- \nHerp Derp";
383 $mail_lines = explode("\n", MailFormatHelper::wrapMail($text));
384 $this->assertEqual("--", $mail_lines[1], 'Trailing whitespace removed for incorrect dash-dash-space signatures.');