123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185 |
- /*
- * to-markdown - an HTML to Markdown converter
- *
- * Copyright 2011, Dom Christie
- * Licenced under the MIT licence
- *
- */
- var toMarkdown = function(string) {
- var ELEMENTS = [
- {
- patterns: 'p',
- replacement: function(str, attrs, innerHTML) {
- return innerHTML ? '\n\n' + innerHTML + '\n' : '';
- }
- },
- {
- patterns: 'br',
- type: 'void',
- replacement: '\n'
- },
- {
- patterns: 'h([1-6])',
- replacement: function(str, hLevel, attrs, innerHTML) {
- var hPrefix = '';
- for(var i = 0; i < hLevel; i++) {
- hPrefix += '#';
- }
- return '\n\n' + hPrefix + ' ' + innerHTML + '\n';
- }
- },
- {
- patterns: 'hr',
- type: 'void',
- replacement: '\n\n* * *\n'
- },
- {
- patterns: 'a',
- replacement: function(str, attrs, innerHTML) {
- var href = attrs.match(attrRegExp('href')),
- title = attrs.match(attrRegExp('title'));
- return href ? '[' + innerHTML + ']' + '(' + href[1] + (title && title[1] ? ' "' + title[1] + '"' : '') + ')' : str;
- }
- },
- {
- patterns: ['b', 'strong'],
- replacement: function(str, attrs, innerHTML) {
- return innerHTML ? '**' + innerHTML + '**' : '';
- }
- },
- {
- patterns: ['i', 'em'],
- replacement: function(str, attrs, innerHTML) {
- return innerHTML ? '_' + innerHTML + '_' : '';
- }
- },
- {
- patterns: 'code',
- replacement: function(str, attrs, innerHTML) {
- return innerHTML ? '`' + innerHTML + '`' : '';
- }
- },
- {
- patterns: 'img',
- type: 'void',
- replacement: function(str, attrs, innerHTML) {
- var src = attrs.match(attrRegExp('src')),
- alt = attrs.match(attrRegExp('alt')),
- title = attrs.match(attrRegExp('title'));
- return '![' + (alt && alt[1] ? alt[1] : '') + ']' + '(' + src[1] + (title && title[1] ? ' "' + title[1] + '"' : '') + ')';
- }
- }
- ];
- for(var i = 0, len = ELEMENTS.length; i < len; i++) {
- if(typeof ELEMENTS[i].patterns === 'string') {
- string = replaceEls(string, { tag: ELEMENTS[i].patterns, replacement: ELEMENTS[i].replacement, type: ELEMENTS[i].type });
- }
- else {
- for(var j = 0, pLen = ELEMENTS[i].patterns.length; j < pLen; j++) {
- string = replaceEls(string, { tag: ELEMENTS[i].patterns[j], replacement: ELEMENTS[i].replacement, type: ELEMENTS[i].type });
- }
- }
- }
- function replaceEls(html, elProperties) {
- var pattern = elProperties.type === 'void' ? '<' + elProperties.tag + '\\b([^>]*)\\/?>' : '<' + elProperties.tag + '\\b([^>]*)>([\\s\\S]*?)<\\/' + elProperties.tag + '>',
- regex = new RegExp(pattern, 'gi'),
- markdown = '';
- if(typeof elProperties.replacement === 'string') {
- markdown = html.replace(regex, elProperties.replacement);
- }
- else {
- markdown = html.replace(regex, function(str, p1, p2, p3) {
- return elProperties.replacement.call(this, str, p1, p2, p3);
- });
- }
- return markdown;
- }
- function attrRegExp(attr) {
- return new RegExp(attr + '\\s*=\\s*["\']?([^"\']*)["\']?', 'i');
- }
- // Pre code blocks
- string = string.replace(/<pre\b[^>]*>`([\s\S]*)`<\/pre>/gi, function(str, innerHTML) {
- innerHTML = innerHTML.replace(/^\t+/g, ' '); // convert tabs to spaces (you know it makes sense)
- innerHTML = innerHTML.replace(/\n/g, '\n ');
- return '\n\n ' + innerHTML + '\n';
- });
- // Lists
- // Escape numbers that could trigger an ol
- // If there are more than three spaces before the code, it would be in a pre tag
- // Make sure we are escaping the period not matching any character
- string = string.replace(/^(\s{0,3}\d+)\. /g, '$1\\. ');
- // Converts lists that have no child lists (of same type) first, then works it's way up
- var noChildrenRegex = /<(ul|ol)\b[^>]*>(?:(?!<ul|<ol)[\s\S])*?<\/\1>/gi;
- while(string.match(noChildrenRegex)) {
- string = string.replace(noChildrenRegex, function(str) {
- return replaceLists(str);
- });
- }
- function replaceLists(html) {
- html = html.replace(/<(ul|ol)\b[^>]*>([\s\S]*?)<\/\1>/gi, function(str, listType, innerHTML) {
- var lis = innerHTML.split('</li>');
- lis.splice(lis.length - 1, 1);
- for(i = 0, len = lis.length; i < len; i++) {
- if(lis[i]) {
- var prefix = (listType === 'ol') ? (i + 1) + ". " : "* ";
- lis[i] = lis[i].replace(/\s*<li[^>]*>([\s\S]*)/i, function(str, innerHTML) {
- innerHTML = innerHTML.replace(/^\s+/, '');
- innerHTML = innerHTML.replace(/\n\n/g, '\n\n ');
- // indent nested lists
- innerHTML = innerHTML.replace(/\n([ ]*)+(\*|\d+\.) /g, '\n$1 $2 ');
- return prefix + innerHTML;
- });
- }
- }
- return lis.join('\n');
- });
- return '\n\n' + html.replace(/[ \t]+\n|\s+$/g, '');
- }
- // Blockquotes
- var deepest = /<blockquote\b[^>]*>((?:(?!<blockquote)[\s\S])*?)<\/blockquote>/gi;
- while(string.match(deepest)) {
- string = string.replace(deepest, function(str) {
- return replaceBlockquotes(str);
- });
- }
- function replaceBlockquotes(html) {
- html = html.replace(/<blockquote\b[^>]*>([\s\S]*?)<\/blockquote>/gi, function(str, inner) {
- inner = inner.replace(/^\s+|\s+$/g, '');
- inner = cleanUp(inner);
- inner = inner.replace(/^/gm, '> ');
- inner = inner.replace(/^(>([ \t]{2,}>)+)/gm, '> >');
- return inner;
- });
- return html;
- }
- function cleanUp(string) {
- string = string.replace(/^[\t\r\n]+|[\t\r\n]+$/g, ''); // trim leading/trailing whitespace
- string = string.replace(/\n\s+\n/g, '\n\n');
- string = string.replace(/\n{3,}/g, '\n\n'); // limit consecutive linebreaks to 2
- return string;
- }
- return cleanUp(string);
- };
- if (typeof exports === 'object') {
- exports.toMarkdown = toMarkdown;
- }
|