ソースを参照

refactor(italicsAndBold): refactoring of italicsAndBold regexes for speed

Estevao Soares dos Santos 8 年 前
コミット
e4c43ea433

+ 11 - 5
dist/showdown.js

@@ -2089,19 +2089,25 @@ showdown.subParser('italicsAndBold', function (text, options, globals) {
 
   text = globals.converter._dispatch('italicsAndBold.before', text, options, globals);
 
+  // it's faster to have 2 separate regexes for each case than have just one
+  // because of backtracing, in some cases, it could lead to an exponential effect
+  // called "catastrophic backtrace". Ominous!
   if (options.literalMidWordUnderscores) {
     //underscores
     // Since we are consuming a \s character, we need to add it
-    text = text.replace(/(^|\s|>|\b)__(?=\S)([\s\S]+?)__(?=\b|<|\s|$)/gm, '$1<strong>$2</strong>');
-    text = text.replace(/(^|\s|>|\b)_(?=\S)([\s\S]+?)_(?=\b|<|\s|$)/gm, '$1<em>$2</em>');
+    text = text.replace(/\b__(\S[\s\S]*?)__\b/gm, '<strong>$1</strong>');
+    text = text.replace(/\b_(\S[\s\S]*?)_\b/gm, '<em>$1</em>');
     //asterisks
-    text = text.replace(/(\*\*)(?=\S)([^\r]*?\S[*]*)\1/g, '<strong>$2</strong>');
+    text = text.replace(/\*\*(?=\S)([^\r]*?\S[*]*)\*\*/g, '<strong>$1</strong>');
     text = text.replace(/(\*)(?=\S)([^\r]*?\S)\1/g, '<em>$2</em>');
 
   } else {
     // <strong> must go first:
-    text = text.replace(/(\*\*|__)(?=\S)([^\r]*?\S[*_]*)\1/g, '<strong>$2</strong>');
-    text = text.replace(/(\*|_)(?=\S)([^\r]*?\S)\1/g, '<em>$2</em>');
+    text = text.replace(/__(\S[\s\S]*?)__/g, '<strong>$1</strong>');
+    text = text.replace(/\*\*(\S[\s\S]*?)\*\*/g, '<strong>$1</strong>');
+    // now <em>
+    text = text.replace(/_(\S[\s\S]*?)_/g, '<em>$1</em>');
+    text = text.replace(/\*(\S[\s\S]*?)\*/g, '<em>$1</em>');
   }
 
   text = globals.converter._dispatch('italicsAndBold.after', text, options, globals);

ファイルの差分が大きいため隠しています
+ 0 - 0
dist/showdown.js.map


ファイルの差分が大きいため隠しています
+ 0 - 0
dist/showdown.min.js


ファイルの差分が大きいため隠しています
+ 0 - 0
dist/showdown.min.js.map


+ 11 - 5
src/subParsers/italicsAndBold.js

@@ -3,19 +3,25 @@ showdown.subParser('italicsAndBold', function (text, options, globals) {
 
   text = globals.converter._dispatch('italicsAndBold.before', text, options, globals);
 
+  // it's faster to have 2 separate regexes for each case than have just one
+  // because of backtracing, in some cases, it could lead to an exponential effect
+  // called "catastrophic backtrace". Ominous!
   if (options.literalMidWordUnderscores) {
     //underscores
     // Since we are consuming a \s character, we need to add it
-    text = text.replace(/(^|\s|>|\b)__(?=\S)([\s\S]+?)__(?=\b|<|\s|$)/gm, '$1<strong>$2</strong>');
-    text = text.replace(/(^|\s|>|\b)_(?=\S)([\s\S]+?)_(?=\b|<|\s|$)/gm, '$1<em>$2</em>');
+    text = text.replace(/\b__(\S[\s\S]*?)__\b/gm, '<strong>$1</strong>');
+    text = text.replace(/\b_(\S[\s\S]*?)_\b/gm, '<em>$1</em>');
     //asterisks
-    text = text.replace(/(\*\*)(?=\S)([^\r]*?\S[*]*)\1/g, '<strong>$2</strong>');
+    text = text.replace(/\*\*(?=\S)([^\r]*?\S[*]*)\*\*/g, '<strong>$1</strong>');
     text = text.replace(/(\*)(?=\S)([^\r]*?\S)\1/g, '<em>$2</em>');
 
   } else {
     // <strong> must go first:
-    text = text.replace(/(\*\*|__)(?=\S)([^\r]*?\S[*_]*)\1/g, '<strong>$2</strong>');
-    text = text.replace(/(\*|_)(?=\S)([^\r]*?\S)\1/g, '<em>$2</em>');
+    text = text.replace(/__(\S[\s\S]*?)__/g, '<strong>$1</strong>');
+    text = text.replace(/\*\*(\S[\s\S]*?)\*\*/g, '<strong>$1</strong>');
+    // now <em>
+    text = text.replace(/_(\S[\s\S]*?)_/g, '<em>$1</em>');
+    text = text.replace(/\*(\S[\s\S]*?)\*/g, '<em>$1</em>');
   }
 
   text = globals.converter._dispatch('italicsAndBold.after', text, options, globals);

+ 2 - 0
test/cases/emphasis.html

@@ -32,3 +32,5 @@
 <p>escaped word*with*asterixs</p>
 <p>escaped word**with**asterixs</p>
 <p>escaped word<strong>*with*</strong>bold asterixs</p>
+<p>foo<strong>bar</strong>baz</p>
+<p>foo<strong>bar</strong>baz</p>

+ 4 - 0
test/cases/emphasis.md

@@ -64,3 +64,7 @@ escaped word\*with*asterixs
 escaped word\*\*with\*\*asterixs
 
 escaped word**\*with\***bold asterixs
+
+foo**bar**baz
+
+foo__bar__baz

この差分においてかなりの量のファイルが変更されているため、一部のファイルを表示していません