Markdown-1.0.2b2.pl 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509
  1. #!/usr/bin/perl
  2. #
  3. # Markdown -- A text-to-HTML conversion tool for web writers
  4. #
  5. # Copyright (c) 2004-2005 John Gruber
  6. # <http://daringfireball.net/projects/markdown/>
  7. #
  8. package Markdown;
  9. require 5.006_000;
  10. use strict;
  11. use warnings;
  12. use Digest::MD5 qw(md5_hex);
  13. use vars qw($VERSION);
  14. $VERSION = '1.0.2b2';
  15. # Sat 26 Mar 2005
  16. ## Disabled; causes problems under Perl 5.6.1:
  17. # use utf8;
  18. # binmode( STDOUT, ":utf8" ); # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html
  19. #
  20. # Global default settings:
  21. #
  22. my $g_empty_element_suffix = " />"; # Change to ">" for HTML output
  23. my $g_tab_width = 4;
  24. #
  25. # Globals:
  26. #
  27. # Regex to match balanced [brackets]. See Friedl's
  28. # "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
  29. my $g_nested_brackets;
  30. $g_nested_brackets = qr{
  31. (?> # Atomic matching
  32. [^\[\]]+ # Anything other than brackets
  33. |
  34. \[
  35. (??{ $g_nested_brackets }) # Recursive set of nested brackets
  36. \]
  37. )*
  38. }x;
  39. # Table of hash values for escaped characters:
  40. my %g_escape_table;
  41. foreach my $char (split //, '\\`*_{}[]()>#+-.!') {
  42. $g_escape_table{$char} = md5_hex($char);
  43. }
  44. # Global hashes, used by various utility routines
  45. my %g_urls;
  46. my %g_titles;
  47. my %g_html_blocks;
  48. # Used to track when we're inside an ordered or unordered list
  49. # (see _ProcessListItems() for details):
  50. my $g_list_level = 0;
  51. #### Blosxom plug-in interface ##########################################
  52. # Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine
  53. # which posts Markdown should process, using a "meta-markup: markdown"
  54. # header. If it's set to 0 (the default), Markdown will process all
  55. # entries.
  56. my $g_blosxom_use_meta = 0;
  57. sub start { 1; }
  58. sub story {
  59. my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_;
  60. if ( (! $g_blosxom_use_meta) or
  61. (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i))
  62. ){
  63. $$body_ref = Markdown($$body_ref);
  64. }
  65. 1;
  66. }
  67. #### Movable Type plug-in interface #####################################
  68. eval {require MT}; # Test to see if we're running in MT.
  69. unless ($@) {
  70. require MT;
  71. import MT;
  72. require MT::Template::Context;
  73. import MT::Template::Context;
  74. eval {require MT::Plugin}; # Test to see if we're running >= MT 3.0.
  75. unless ($@) {
  76. require MT::Plugin;
  77. import MT::Plugin;
  78. my $plugin = new MT::Plugin({
  79. name => "Markdown",
  80. description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)",
  81. doc_link => 'http://daringfireball.net/projects/markdown/'
  82. });
  83. MT->add_plugin( $plugin );
  84. }
  85. MT::Template::Context->add_container_tag(MarkdownOptions => sub {
  86. my $ctx = shift;
  87. my $args = shift;
  88. my $builder = $ctx->stash('builder');
  89. my $tokens = $ctx->stash('tokens');
  90. if (defined ($args->{'output'}) ) {
  91. $ctx->stash('markdown_output', lc $args->{'output'});
  92. }
  93. defined (my $str = $builder->build($ctx, $tokens) )
  94. or return $ctx->error($builder->errstr);
  95. $str; # return value
  96. });
  97. MT->add_text_filter('markdown' => {
  98. label => 'Markdown',
  99. docs => 'http://daringfireball.net/projects/markdown/',
  100. on_format => sub {
  101. my $text = shift;
  102. my $ctx = shift;
  103. my $raw = 0;
  104. if (defined $ctx) {
  105. my $output = $ctx->stash('markdown_output');
  106. if (defined $output && $output =~ m/^html/i) {
  107. $g_empty_element_suffix = ">";
  108. $ctx->stash('markdown_output', '');
  109. }
  110. elsif (defined $output && $output eq 'raw') {
  111. $raw = 1;
  112. $ctx->stash('markdown_output', '');
  113. }
  114. else {
  115. $raw = 0;
  116. $g_empty_element_suffix = " />";
  117. }
  118. }
  119. $text = $raw ? $text : Markdown($text);
  120. $text;
  121. },
  122. });
  123. # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter:
  124. my $smartypants;
  125. {
  126. no warnings "once";
  127. $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'};
  128. }
  129. if ($smartypants) {
  130. MT->add_text_filter('markdown_with_smartypants' => {
  131. label => 'Markdown With SmartyPants',
  132. docs => 'http://daringfireball.net/projects/markdown/',
  133. on_format => sub {
  134. my $text = shift;
  135. my $ctx = shift;
  136. if (defined $ctx) {
  137. my $output = $ctx->stash('markdown_output');
  138. if (defined $output && $output eq 'html') {
  139. $g_empty_element_suffix = ">";
  140. }
  141. else {
  142. $g_empty_element_suffix = " />";
  143. }
  144. }
  145. $text = Markdown($text);
  146. $text = $smartypants->($text, '1');
  147. },
  148. });
  149. }
  150. }
  151. else {
  152. #### BBEdit/command-line text filter interface ##########################
  153. # Needs to be hidden from MT (and Blosxom when running in static mode).
  154. # We're only using $blosxom::version once; tell Perl not to warn us:
  155. no warnings 'once';
  156. unless ( defined($blosxom::version) ) {
  157. use warnings;
  158. #### Check for command-line switches: #################
  159. my %cli_opts;
  160. use Getopt::Long;
  161. Getopt::Long::Configure('pass_through');
  162. GetOptions(\%cli_opts,
  163. 'version',
  164. 'shortversion',
  165. 'html4tags',
  166. );
  167. if ($cli_opts{'version'}) { # Version info
  168. print "\nThis is Markdown, version $VERSION.\n";
  169. print "Copyright 2004 John Gruber\n";
  170. print "http://daringfireball.net/projects/markdown/\n\n";
  171. exit 0;
  172. }
  173. if ($cli_opts{'shortversion'}) { # Just the version number string.
  174. print $VERSION;
  175. exit 0;
  176. }
  177. if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML
  178. $g_empty_element_suffix = ">";
  179. }
  180. #### Process incoming text: ###########################
  181. my $text;
  182. {
  183. local $/; # Slurp the whole file
  184. $text = <>;
  185. }
  186. print Markdown($text);
  187. }
  188. }
  189. sub Markdown {
  190. #
  191. # Main function. The order in which other subs are called here is
  192. # essential. Link and image substitutions need to happen before
  193. # _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
  194. # and <img> tags get encoded.
  195. #
  196. my $text = shift;
  197. # Clear the global hashes. If we don't clear these, you get conflicts
  198. # from other articles when generating a page which contains more than
  199. # one article (e.g. an index page that shows the N most recent
  200. # articles):
  201. %g_urls = ();
  202. %g_titles = ();
  203. %g_html_blocks = ();
  204. # Standardize line endings:
  205. $text =~ s{\r\n}{\n}g; # DOS to Unix
  206. $text =~ s{\r}{\n}g; # Mac to Unix
  207. # Make sure $text ends with a couple of newlines:
  208. $text .= "\n\n";
  209. # Convert all tabs to spaces.
  210. $text = _Detab($text);
  211. # Strip any lines consisting only of spaces and tabs.
  212. # This makes subsequent regexen easier to write, because we can
  213. # match consecutive blank lines with /\n+/ instead of something
  214. # contorted like /[ \t]*\n+/ .
  215. $text =~ s/^[ \t]+$//mg;
  216. # Turn block-level HTML blocks into hash entries
  217. $text = _HashHTMLBlocks($text);
  218. # Strip link definitions, store in hashes.
  219. $text = _StripLinkDefinitions($text);
  220. $text = _RunBlockGamut($text);
  221. $text = _UnescapeSpecialChars($text);
  222. return $text . "\n";
  223. }
  224. sub _StripLinkDefinitions {
  225. #
  226. # Strips link definitions from text, stores the URLs and titles in
  227. # hash references.
  228. #
  229. my $text = shift;
  230. my $less_than_tab = $g_tab_width - 1;
  231. # Link defs are in the form: ^[id]: url "optional title"
  232. while ($text =~ s{
  233. ^[ ]{0,$less_than_tab}\[(.+)\]: # id = $1
  234. [ \t]*
  235. \n? # maybe *one* newline
  236. [ \t]*
  237. <?(\S+?)>? # url = $2
  238. [ \t]*
  239. \n? # maybe one newline
  240. [ \t]*
  241. (?:
  242. (?<=\s) # lookbehind for whitespace
  243. ["(]
  244. (.+?) # title = $3
  245. [")]
  246. [ \t]*
  247. )? # title is optional
  248. (?:\n+|\Z)
  249. }
  250. {}mx) {
  251. $g_urls{lc $1} = _EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive
  252. if ($3) {
  253. $g_titles{lc $1} = $3;
  254. $g_titles{lc $1} =~ s/"/&quot;/g;
  255. }
  256. }
  257. return $text;
  258. }
  259. sub _HashHTMLBlocks {
  260. my $text = shift;
  261. my $less_than_tab = $g_tab_width - 1;
  262. # Hashify HTML blocks:
  263. # We only want to do this for block-level HTML tags, such as headers,
  264. # lists, and tables. That's because we still want to wrap <p>s around
  265. # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
  266. # phrase emphasis, and spans. The list of tags we're looking for is
  267. # hard-coded:
  268. my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/;
  269. my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/;
  270. # First, look for nested blocks, e.g.:
  271. # <div>
  272. # <div>
  273. # tags for inner block must be indented.
  274. # </div>
  275. # </div>
  276. #
  277. # The outermost tags must start at the left margin for this to match, and
  278. # the inner nested divs must be indented.
  279. # We need to do this before the next, more liberal match, because the next
  280. # match will start at the first `<div>` and stop at the first `</div>`.
  281. $text =~ s{
  282. ( # save in $1
  283. ^ # start of line (with /m)
  284. <($block_tags_a) # start tag = $2
  285. \b # word break
  286. (.*\n)*? # any number of lines, minimally matching
  287. </\2> # the matching end tag
  288. [ \t]* # trailing spaces/tabs
  289. (?=\n+|\Z) # followed by a newline or end of document
  290. )
  291. }{
  292. my $key = md5_hex($1);
  293. $g_html_blocks{$key} = $1;
  294. "\n\n" . $key . "\n\n";
  295. }egmx;
  296. #
  297. # Now match more liberally, simply from `\n<tag>` to `</tag>\n`
  298. #
  299. $text =~ s{
  300. ( # save in $1
  301. ^ # start of line (with /m)
  302. <($block_tags_b) # start tag = $2
  303. \b # word break
  304. (.*\n)*? # any number of lines, minimally matching
  305. .*</\2> # the matching end tag
  306. [ \t]* # trailing spaces/tabs
  307. (?=\n+|\Z) # followed by a newline or end of document
  308. )
  309. }{
  310. my $key = md5_hex($1);
  311. $g_html_blocks{$key} = $1;
  312. "\n\n" . $key . "\n\n";
  313. }egmx;
  314. # Special case just for <hr />. It was easier to make a special case than
  315. # to make the other regex more complicated.
  316. $text =~ s{
  317. (?:
  318. (?<=\n\n) # Starting after a blank line
  319. | # or
  320. \A\n? # the beginning of the doc
  321. )
  322. ( # save in $1
  323. [ ]{0,$less_than_tab}
  324. <(hr) # start tag = $2
  325. \b # word break
  326. ([^<>])*? #
  327. /?> # the matching end tag
  328. [ \t]*
  329. (?=\n{2,}|\Z) # followed by a blank line or end of document
  330. )
  331. }{
  332. my $key = md5_hex($1);
  333. $g_html_blocks{$key} = $1;
  334. "\n\n" . $key . "\n\n";
  335. }egx;
  336. # Special case for standalone HTML comments:
  337. $text =~ s{
  338. (?:
  339. (?<=\n\n) # Starting after a blank line
  340. | # or
  341. \A\n? # the beginning of the doc
  342. )
  343. ( # save in $1
  344. [ ]{0,$less_than_tab}
  345. (?s:
  346. <!
  347. (--.*?--\s*)+
  348. >
  349. )
  350. [ \t]*
  351. (?=\n{2,}|\Z) # followed by a blank line or end of document
  352. )
  353. }{
  354. my $key = md5_hex($1);
  355. $g_html_blocks{$key} = $1;
  356. "\n\n" . $key . "\n\n";
  357. }egx;
  358. return $text;
  359. }
  360. sub _RunBlockGamut {
  361. #
  362. # These are all the transformations that form block-level
  363. # tags like paragraphs, headers, and list items.
  364. #
  365. my $text = shift;
  366. $text = _DoHeaders($text);
  367. # Do Horizontal Rules:
  368. $text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
  369. $text =~ s{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
  370. $text =~ s{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
  371. $text = _DoLists($text);
  372. $text = _DoCodeBlocks($text);
  373. $text = _DoBlockQuotes($text);
  374. # We already ran _HashHTMLBlocks() before, in Markdown(), but that
  375. # was to escape raw HTML in the original Markdown source. This time,
  376. # we're escaping the markup we've just created, so that we don't wrap
  377. # <p> tags around block-level tags.
  378. $text = _HashHTMLBlocks($text);
  379. $text = _FormParagraphs($text);
  380. return $text;
  381. }
  382. sub _RunSpanGamut {
  383. #
  384. # These are all the transformations that occur *within* block-level
  385. # tags like paragraphs, headers, and list items.
  386. #
  387. my $text = shift;
  388. $text = _EscapeSpecialCharsWithinTagAttributes($text);
  389. $text = _DoCodeSpans($text);
  390. $text = _EncodeBackslashEscapes($text);
  391. # Process anchor and image tags. Images must come first,
  392. # because ![foo][f] looks like an anchor.
  393. $text = _DoImages($text);
  394. $text = _DoAnchors($text);
  395. # Make links out of things like `<http://example.com/>`
  396. # Must come after _DoAnchors(), because you can use < and >
  397. # delimiters in inline links like [this](<url>).
  398. $text = _DoAutoLinks($text);
  399. $text = _EncodeAmpsAndAngles($text);
  400. $text = _DoItalicsAndBold($text);
  401. # Do hard breaks:
  402. $text =~ s/ {2,}\n/ <br$g_empty_element_suffix\n/g;
  403. return $text;
  404. }
  405. sub _EscapeSpecialCharsWithinTagAttributes {
  406. #
  407. # Within tags -- meaning between < and > -- encode [\ ` * _] so they
  408. # don't conflict with their use in Markdown for code, italics and strong.
  409. # We're replacing each such character with its corresponding MD5 checksum
  410. # value; this is likely overkill, but it should prevent us from colliding
  411. # with the escape values by accident.
  412. #
  413. my $text = shift;
  414. my $tokens ||= _TokenizeHTML($text);
  415. $text = ''; # rebuild $text from the tokens
  416. foreach my $cur_token (@$tokens) {
  417. if ($cur_token->[0] eq "tag") {
  418. $cur_token->[1] =~ s! \\ !$g_escape_table{'\\'}!gx;
  419. $cur_token->[1] =~ s! ` !$g_escape_table{'`'}!gx;
  420. $cur_token->[1] =~ s! \* !$g_escape_table{'*'}!gx;
  421. $cur_token->[1] =~ s! _ !$g_escape_table{'_'}!gx;
  422. }
  423. $text .= $cur_token->[1];
  424. }
  425. return $text;
  426. }
  427. sub _DoAnchors {
  428. #
  429. # Turn Markdown link shortcuts into XHTML <a> tags.
  430. #
  431. my $text = shift;
  432. #
  433. # First, handle reference-style links: [link text] [id]
  434. #
  435. $text =~ s{
  436. ( # wrap whole match in $1
  437. \[
  438. ($g_nested_brackets) # link text = $2
  439. \]
  440. [ ]? # one optional space
  441. (?:\n[ ]*)? # one optional newline followed by spaces
  442. \[
  443. (.*?) # id = $3
  444. \]
  445. )
  446. }{
  447. my $result;
  448. my $whole_match = $1;
  449. my $link_text = $2;
  450. my $link_id = lc $3;
  451. if ($link_id eq "") {
  452. $link_id = lc $link_text; # for shortcut links like [this][].
  453. }
  454. if (defined $g_urls{$link_id}) {
  455. my $url = $g_urls{$link_id};
  456. $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
  457. $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
  458. $result = "<a href=\"$url\"";
  459. if ( defined $g_titles{$link_id} ) {
  460. my $title = $g_titles{$link_id};
  461. $title =~ s! \* !$g_escape_table{'*'}!gx;
  462. $title =~ s! _ !$g_escape_table{'_'}!gx;
  463. $result .= " title=\"$title\"";
  464. }
  465. $result .= ">$link_text</a>";
  466. }
  467. else {
  468. $result = $whole_match;
  469. }
  470. $result;
  471. }xsge;
  472. #
  473. # Next, inline-style links: [link text](url "optional title")
  474. #
  475. $text =~ s{
  476. ( # wrap whole match in $1
  477. \[
  478. ($g_nested_brackets) # link text = $2
  479. \]
  480. \( # literal paren
  481. [ \t]*
  482. <?(.*?)>? # href = $3
  483. [ \t]*
  484. ( # $4
  485. (['"]) # quote char = $5
  486. (.*?) # Title = $6
  487. \5 # matching quote
  488. )? # title is optional
  489. \)
  490. )
  491. }{
  492. my $result;
  493. my $whole_match = $1;
  494. my $link_text = $2;
  495. my $url = $3;
  496. my $title = $6;
  497. $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
  498. $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
  499. $result = "<a href=\"$url\"";
  500. if (defined $title) {
  501. $title =~ s/"/&quot;/g;
  502. $title =~ s! \* !$g_escape_table{'*'}!gx;
  503. $title =~ s! _ !$g_escape_table{'_'}!gx;
  504. $result .= " title=\"$title\"";
  505. }
  506. $result .= ">$link_text</a>";
  507. $result;
  508. }xsge;
  509. #
  510. # Last, handle reference-style shortcuts: [link text]
  511. # These must come last in case you've also got [link test][1]
  512. # or [link test](/foo)
  513. #
  514. $text =~ s{
  515. ( # wrap whole match in $1
  516. \[
  517. ([^\[\]]+) # link text = $2; can't contain '[' or ']'
  518. \]
  519. )
  520. }{
  521. my $result;
  522. my $whole_match = $1;
  523. my $link_text = $2;
  524. (my $link_id = lc $2) =~ s{[ ]?\n}{ }g; # lower-case and turn embedded newlines into spaces
  525. if (defined $g_urls{$link_id}) {
  526. my $url = $g_urls{$link_id};
  527. $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
  528. $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
  529. $result = "<a href=\"$url\"";
  530. if ( defined $g_titles{$link_id} ) {
  531. my $title = $g_titles{$link_id};
  532. $title =~ s! \* !$g_escape_table{'*'}!gx;
  533. $title =~ s! _ !$g_escape_table{'_'}!gx;
  534. $result .= " title=\"$title\"";
  535. }
  536. $result .= ">$link_text</a>";
  537. }
  538. else {
  539. $result = $whole_match;
  540. }
  541. $result;
  542. }xsge;
  543. return $text;
  544. }
  545. sub _DoImages {
  546. #
  547. # Turn Markdown image shortcuts into <img> tags.
  548. #
  549. my $text = shift;
  550. #
  551. # First, handle reference-style labeled images: ![alt text][id]
  552. #
  553. $text =~ s{
  554. ( # wrap whole match in $1
  555. !\[
  556. (.*?) # alt text = $2
  557. \]
  558. [ ]? # one optional space
  559. (?:\n[ ]*)? # one optional newline followed by spaces
  560. \[
  561. (.*?) # id = $3
  562. \]
  563. )
  564. }{
  565. my $result;
  566. my $whole_match = $1;
  567. my $alt_text = $2;
  568. my $link_id = lc $3;
  569. if ($link_id eq "") {
  570. $link_id = lc $alt_text; # for shortcut links like ![this][].
  571. }
  572. $alt_text =~ s/"/&quot;/g;
  573. if (defined $g_urls{$link_id}) {
  574. my $url = $g_urls{$link_id};
  575. $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
  576. $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
  577. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  578. if (defined $g_titles{$link_id}) {
  579. my $title = $g_titles{$link_id};
  580. $title =~ s! \* !$g_escape_table{'*'}!gx;
  581. $title =~ s! _ !$g_escape_table{'_'}!gx;
  582. $result .= " title=\"$title\"";
  583. }
  584. $result .= $g_empty_element_suffix;
  585. }
  586. else {
  587. # If there's no such link ID, leave intact:
  588. $result = $whole_match;
  589. }
  590. $result;
  591. }xsge;
  592. #
  593. # Next, handle inline images: ![alt text](url "optional title")
  594. # Don't forget: encode * and _
  595. $text =~ s{
  596. ( # wrap whole match in $1
  597. !\[
  598. (.*?) # alt text = $2
  599. \]
  600. \( # literal paren
  601. [ \t]*
  602. <?(\S+?)>? # src url = $3
  603. [ \t]*
  604. ( # $4
  605. (['"]) # quote char = $5
  606. (.*?) # title = $6
  607. \5 # matching quote
  608. [ \t]*
  609. )? # title is optional
  610. \)
  611. )
  612. }{
  613. my $result;
  614. my $whole_match = $1;
  615. my $alt_text = $2;
  616. my $url = $3;
  617. my $title = '';
  618. if (defined($6)) {
  619. $title = $6;
  620. }
  621. $alt_text =~ s/"/&quot;/g;
  622. $title =~ s/"/&quot;/g;
  623. $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
  624. $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
  625. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  626. if (defined $title) {
  627. $title =~ s! \* !$g_escape_table{'*'}!gx;
  628. $title =~ s! _ !$g_escape_table{'_'}!gx;
  629. $result .= " title=\"$title\"";
  630. }
  631. $result .= $g_empty_element_suffix;
  632. $result;
  633. }xsge;
  634. return $text;
  635. }
  636. sub _DoHeaders {
  637. my $text = shift;
  638. # Setext-style headers:
  639. # Header 1
  640. # ========
  641. #
  642. # Header 2
  643. # --------
  644. #
  645. $text =~ s{ ^(.+)[ \t]*\n=+[ \t]*\n+ }{
  646. "<h1>" . _RunSpanGamut($1) . "</h1>\n\n";
  647. }egmx;
  648. $text =~ s{ ^(.+)[ \t]*\n-+[ \t]*\n+ }{
  649. "<h2>" . _RunSpanGamut($1) . "</h2>\n\n";
  650. }egmx;
  651. # atx-style headers:
  652. # # Header 1
  653. # ## Header 2
  654. # ## Header 2 with closing hashes ##
  655. # ...
  656. # ###### Header 6
  657. #
  658. $text =~ s{
  659. ^(\#{1,6}) # $1 = string of #'s
  660. [ \t]*
  661. (.+?) # $2 = Header text
  662. [ \t]*
  663. \#* # optional closing #'s (not counted)
  664. \n+
  665. }{
  666. my $h_level = length($1);
  667. "<h$h_level>" . _RunSpanGamut($2) . "</h$h_level>\n\n";
  668. }egmx;
  669. return $text;
  670. }
  671. sub _DoLists {
  672. #
  673. # Form HTML ordered (numbered) and unordered (bulleted) lists.
  674. #
  675. my $text = shift;
  676. my $less_than_tab = $g_tab_width - 1;
  677. # Re-usable patterns to match list item bullets and number markers:
  678. my $marker_ul = qr/[*+-]/;
  679. my $marker_ol = qr/\d+[.]/;
  680. my $marker_any = qr/(?:$marker_ul|$marker_ol)/;
  681. # Re-usable pattern to match any entirel ul or ol list:
  682. my $whole_list = qr{
  683. ( # $1 = whole list
  684. ( # $2
  685. [ ]{0,$less_than_tab}
  686. (${marker_any}) # $3 = first list item marker
  687. [ \t]+
  688. )
  689. (?s:.+?)
  690. ( # $4
  691. \z
  692. |
  693. \n{2,}
  694. (?=\S)
  695. (?! # Negative lookahead for another list item marker
  696. [ \t]*
  697. ${marker_any}[ \t]+
  698. )
  699. )
  700. )
  701. }mx;
  702. # We use a different prefix before nested lists than top-level lists.
  703. # See extended comment in _ProcessListItems().
  704. #
  705. # Note: There's a bit of duplication here. My original implementation
  706. # created a scalar regex pattern as the conditional result of the test on
  707. # $g_list_level, and then only ran the $text =~ s{...}{...}egmx
  708. # substitution once, using the scalar as the pattern. This worked,
  709. # everywhere except when running under MT on my hosting account at Pair
  710. # Networks. There, this caused all rebuilds to be killed by the reaper (or
  711. # perhaps they crashed, but that seems incredibly unlikely given that the
  712. # same script on the same server ran fine *except* under MT. I've spent
  713. # more time trying to figure out why this is happening than I'd like to
  714. # admit. My only guess, backed up by the fact that this workaround works,
  715. # is that Perl optimizes the substition when it can figure out that the
  716. # pattern will never change, and when this optimization isn't on, we run
  717. # afoul of the reaper. Thus, the slightly redundant code that uses two
  718. # static s/// patterns rather than one conditional pattern.
  719. if ($g_list_level) {
  720. $text =~ s{
  721. ^
  722. $whole_list
  723. }{
  724. my $list = $1;
  725. my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
  726. # Turn double returns into triple returns, so that we can make a
  727. # paragraph for the last item in a list, if necessary:
  728. $list =~ s/\n{2,}/\n\n\n/g;
  729. my $result = _ProcessListItems($list, $marker_any);
  730. # Trim any trailing whitespace, to put the closing `</$list_type>`
  731. # up on the preceding line, to get it past the current stupid
  732. # HTML block parser. This is a hack to work around the terrible
  733. # hack that is the HTML block parser.
  734. $result =~ s{\s+$}{};
  735. $result = "<$list_type>" . $result . "</$list_type>\n";
  736. $result;
  737. }egmx;
  738. }
  739. else {
  740. $text =~ s{
  741. (?:(?<=\n\n)|\A\n?)
  742. $whole_list
  743. }{
  744. my $list = $1;
  745. my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
  746. # Turn double returns into triple returns, so that we can make a
  747. # paragraph for the last item in a list, if necessary:
  748. $list =~ s/\n{2,}/\n\n\n/g;
  749. my $result = _ProcessListItems($list, $marker_any);
  750. $result = "<$list_type>\n" . $result . "</$list_type>\n";
  751. $result;
  752. }egmx;
  753. }
  754. return $text;
  755. }
  756. sub _ProcessListItems {
  757. #
  758. # Process the contents of a single ordered or unordered list, splitting it
  759. # into individual list items.
  760. #
  761. my $list_str = shift;
  762. my $marker_any = shift;
  763. # The $g_list_level global keeps track of when we're inside a list.
  764. # Each time we enter a list, we increment it; when we leave a list,
  765. # we decrement. If it's zero, we're not in a list anymore.
  766. #
  767. # We do this because when we're not inside a list, we want to treat
  768. # something like this:
  769. #
  770. # I recommend upgrading to version
  771. # 8. Oops, now this line is treated
  772. # as a sub-list.
  773. #
  774. # As a single paragraph, despite the fact that the second line starts
  775. # with a digit-period-space sequence.
  776. #
  777. # Whereas when we're inside a list (or sub-list), that line will be
  778. # treated as the start of a sub-list. What a kludge, huh? This is
  779. # an aspect of Markdown's syntax that's hard to parse perfectly
  780. # without resorting to mind-reading. Perhaps the solution is to
  781. # change the syntax rules such that sub-lists must start with a
  782. # starting cardinal number; e.g. "1." or "a.".
  783. $g_list_level++;
  784. # trim trailing blank lines:
  785. $list_str =~ s/\n{2,}\z/\n/;
  786. $list_str =~ s{
  787. (\n)? # leading line = $1
  788. (^[ \t]*) # leading whitespace = $2
  789. ($marker_any) [ \t]+ # list marker = $3
  790. ((?s:.+?) # list item text = $4
  791. (\n{1,2}))
  792. (?= \n* (\z | \2 ($marker_any) [ \t]+))
  793. }{
  794. my $item = $4;
  795. my $leading_line = $1;
  796. my $leading_space = $2;
  797. if ($leading_line or ($item =~ m/\n{2,}/)) {
  798. $item = _RunBlockGamut(_Outdent($item));
  799. }
  800. else {
  801. # Recursion for sub-lists:
  802. $item = _DoLists(_Outdent($item));
  803. chomp $item;
  804. $item = _RunSpanGamut($item);
  805. }
  806. "<li>" . $item . "</li>\n";
  807. }egmx;
  808. $g_list_level--;
  809. return $list_str;
  810. }
  811. sub _DoCodeBlocks {
  812. #
  813. # Process Markdown `<pre><code>` blocks.
  814. #
  815. my $text = shift;
  816. $text =~ s{
  817. (?:\n\n|\A)
  818. ( # $1 = the code block -- one or more lines, starting with a space/tab
  819. (?:
  820. (?:[ ]{$g_tab_width} | \t) # Lines must start with a tab or a tab-width of spaces
  821. .*\n+
  822. )+
  823. )
  824. ((?=^[ ]{0,$g_tab_width}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
  825. }{
  826. my $codeblock = $1;
  827. my $result; # return value
  828. $codeblock = _EncodeCode(_Outdent($codeblock));
  829. $codeblock = _Detab($codeblock);
  830. $codeblock =~ s/\A\n+//; # trim leading newlines
  831. $codeblock =~ s/\s+\z//; # trim trailing whitespace
  832. $result = "\n\n<pre><code>" . $codeblock . "\n</code></pre>\n\n";
  833. $result;
  834. }egmx;
  835. return $text;
  836. }
  837. sub _DoCodeSpans {
  838. #
  839. # * Backtick quotes are used for <code></code> spans.
  840. #
  841. # * You can use multiple backticks as the delimiters if you want to
  842. # include literal backticks in the code span. So, this input:
  843. #
  844. # Just type ``foo `bar` baz`` at the prompt.
  845. #
  846. # Will translate to:
  847. #
  848. # <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
  849. #
  850. # There's no arbitrary limit to the number of backticks you
  851. # can use as delimters. If you need three consecutive backticks
  852. # in your code, use four for delimiters, etc.
  853. #
  854. # * You can use spaces to get literal backticks at the edges:
  855. #
  856. # ... type `` `bar` `` ...
  857. #
  858. # Turns to:
  859. #
  860. # ... type <code>`bar`</code> ...
  861. #
  862. my $text = shift;
  863. $text =~ s@
  864. (?<!\\) # Character before opening ` can't be a backslash
  865. (`+) # $1 = Opening run of `
  866. (.+?) # $2 = The code block
  867. (?<!`)
  868. \1 # Matching closer
  869. (?!`)
  870. @
  871. my $c = "$2";
  872. $c =~ s/^[ \t]*//g; # leading whitespace
  873. $c =~ s/[ \t]*$//g; # trailing whitespace
  874. $c = _EncodeCode($c);
  875. "<code>$c</code>";
  876. @egsx;
  877. return $text;
  878. }
  879. sub _EncodeCode {
  880. #
  881. # Encode/escape certain characters inside Markdown code runs.
  882. # The point is that in code, these characters are literals,
  883. # and lose their special Markdown meanings.
  884. #
  885. local $_ = shift;
  886. # Encode all ampersands; HTML entities are not
  887. # entities within a Markdown code span.
  888. s/&/&amp;/g;
  889. # Encode $'s, but only if we're running under Blosxom.
  890. # (Blosxom interpolates Perl variables in article bodies.)
  891. {
  892. no warnings 'once';
  893. if (defined($blosxom::version)) {
  894. s/\$/&#036;/g;
  895. }
  896. }
  897. # Do the angle bracket song and dance:
  898. s! < !&lt;!gx;
  899. s! > !&gt;!gx;
  900. # Now, escape characters that are magic in Markdown:
  901. s! \* !$g_escape_table{'*'}!gx;
  902. s! _ !$g_escape_table{'_'}!gx;
  903. s! { !$g_escape_table{'{'}!gx;
  904. s! } !$g_escape_table{'}'}!gx;
  905. s! \[ !$g_escape_table{'['}!gx;
  906. s! \] !$g_escape_table{']'}!gx;
  907. s! \\ !$g_escape_table{'\\'}!gx;
  908. return $_;
  909. }
  910. sub _DoItalicsAndBold {
  911. my $text = shift;
  912. # <strong> must go first:
  913. $text =~ s{ (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 }
  914. {<strong>$2</strong>}gsx;
  915. $text =~ s{ (\*|_) (?=\S) (.+?) (?<=\S) \1 }
  916. {<em>$2</em>}gsx;
  917. return $text;
  918. }
  919. sub _DoBlockQuotes {
  920. my $text = shift;
  921. $text =~ s{
  922. ( # Wrap whole match in $1
  923. (
  924. ^[ \t]*>[ \t]? # '>' at the start of a line
  925. .+\n # rest of the first line
  926. (.+\n)* # subsequent consecutive lines
  927. \n* # blanks
  928. )+
  929. )
  930. }{
  931. my $bq = $1;
  932. $bq =~ s/^[ \t]*>[ \t]?//gm; # trim one level of quoting
  933. $bq =~ s/^[ \t]+$//mg; # trim whitespace-only lines
  934. $bq = _RunBlockGamut($bq); # recurse
  935. $bq =~ s/^/ /g;
  936. # These leading spaces screw with <pre> content, so we need to fix that:
  937. $bq =~ s{
  938. (\s*<pre>.+?</pre>)
  939. }{
  940. my $pre = $1;
  941. $pre =~ s/^ //mg;
  942. $pre;
  943. }egsx;
  944. "<blockquote>\n$bq\n</blockquote>\n\n";
  945. }egmx;
  946. return $text;
  947. }
  948. sub _FormParagraphs {
  949. #
  950. # Params:
  951. # $text - string to process with html <p> tags
  952. #
  953. my $text = shift;
  954. # Strip leading and trailing lines:
  955. $text =~ s/\A\n+//;
  956. $text =~ s/\n+\z//;
  957. my @grafs = split(/\n{2,}/, $text);
  958. #
  959. # Wrap <p> tags.
  960. #
  961. foreach (@grafs) {
  962. unless (defined( $g_html_blocks{$_} )) {
  963. $_ = _RunSpanGamut($_);
  964. s/^([ \t]*)/<p>/;
  965. $_ .= "</p>";
  966. }
  967. }
  968. #
  969. # Unhashify HTML blocks
  970. #
  971. foreach (@grafs) {
  972. if (defined( $g_html_blocks{$_} )) {
  973. $_ = $g_html_blocks{$_};
  974. }
  975. }
  976. return join "\n\n", @grafs;
  977. }
  978. sub _EncodeAmpsAndAngles {
  979. # Smart processing for ampersands and angle brackets that need to be encoded.
  980. my $text = shift;
  981. # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
  982. # http://bumppo.net/projects/amputator/
  983. $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&amp;/g;
  984. # Encode naked <'s
  985. $text =~ s{<(?![a-z/?\$!])}{&lt;}gi;
  986. return $text;
  987. }
  988. sub _EncodeBackslashEscapes {
  989. #
  990. # Parameter: String.
  991. # Returns: The string, with after processing the following backslash
  992. # escape sequences.
  993. #
  994. local $_ = shift;
  995. s! \\\\ !$g_escape_table{'\\'}!gx; # Must process escaped backslashes first.
  996. s! \\` !$g_escape_table{'`'}!gx;
  997. s! \\\* !$g_escape_table{'*'}!gx;
  998. s! \\_ !$g_escape_table{'_'}!gx;
  999. s! \\\{ !$g_escape_table{'{'}!gx;
  1000. s! \\\} !$g_escape_table{'}'}!gx;
  1001. s! \\\[ !$g_escape_table{'['}!gx;
  1002. s! \\\] !$g_escape_table{']'}!gx;
  1003. s! \\\( !$g_escape_table{'('}!gx;
  1004. s! \\\) !$g_escape_table{')'}!gx;
  1005. s! \\> !$g_escape_table{'>'}!gx;
  1006. s! \\\# !$g_escape_table{'#'}!gx;
  1007. s! \\\+ !$g_escape_table{'+'}!gx;
  1008. s! \\\- !$g_escape_table{'-'}!gx;
  1009. s! \\\. !$g_escape_table{'.'}!gx;
  1010. s{ \\! }{$g_escape_table{'!'}}gx;
  1011. return $_;
  1012. }
  1013. sub _DoAutoLinks {
  1014. my $text = shift;
  1015. $text =~ s{<((https?|ftp):[^'">\s]+)>}{<a href="$1">$1</a>}gi;
  1016. # Email addresses: <address@domain.foo>
  1017. $text =~ s{
  1018. <
  1019. (?:mailto:)?
  1020. (
  1021. [-.\w]+
  1022. \@
  1023. [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
  1024. )
  1025. >
  1026. }{
  1027. _EncodeEmailAddress( _UnescapeSpecialChars($1) );
  1028. }egix;
  1029. return $text;
  1030. }
  1031. sub _EncodeEmailAddress {
  1032. #
  1033. # Input: an email address, e.g. "foo@example.com"
  1034. #
  1035. # Output: the email address as a mailto link, with each character
  1036. # of the address encoded as either a decimal or hex entity, in
  1037. # the hopes of foiling most address harvesting spam bots. E.g.:
  1038. #
  1039. # <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
  1040. # x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
  1041. # &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
  1042. #
  1043. # Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
  1044. # mailing list: <http://tinyurl.com/yu7ue>
  1045. #
  1046. my $addr = shift;
  1047. srand;
  1048. my @encode = (
  1049. sub { '&#' . ord(shift) . ';' },
  1050. sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' },
  1051. sub { shift },
  1052. );
  1053. $addr = "mailto:" . $addr;
  1054. $addr =~ s{(.)}{
  1055. my $char = $1;
  1056. if ( $char eq '@' ) {
  1057. # this *must* be encoded. I insist.
  1058. $char = $encode[int rand 1]->($char);
  1059. } elsif ( $char ne ':' ) {
  1060. # leave ':' alone (to spot mailto: later)
  1061. my $r = rand;
  1062. # roughly 10% raw, 45% hex, 45% dec
  1063. $char = (
  1064. $r > .9 ? $encode[2]->($char) :
  1065. $r < .45 ? $encode[1]->($char) :
  1066. $encode[0]->($char)
  1067. );
  1068. }
  1069. $char;
  1070. }gex;
  1071. $addr = qq{<a href="$addr">$addr</a>};
  1072. $addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part
  1073. return $addr;
  1074. }
  1075. sub _UnescapeSpecialChars {
  1076. #
  1077. # Swap back in all the special characters we've hidden.
  1078. #
  1079. my $text = shift;
  1080. while( my($char, $hash) = each(%g_escape_table) ) {
  1081. $text =~ s/$hash/$char/g;
  1082. }
  1083. return $text;
  1084. }
  1085. sub _TokenizeHTML {
  1086. #
  1087. # Parameter: String containing HTML markup.
  1088. # Returns: Reference to an array of the tokens comprising the input
  1089. # string. Each token is either a tag (possibly with nested,
  1090. # tags contained therein, such as <a href="<MTFoo>">, or a
  1091. # run of text between tags. Each element of the array is a
  1092. # two-element array; the first is either 'tag' or 'text';
  1093. # the second is the actual value.
  1094. #
  1095. #
  1096. # Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin.
  1097. # <http://www.bradchoate.com/past/mtregex.php>
  1098. #
  1099. my $str = shift;
  1100. my $pos = 0;
  1101. my $len = length $str;
  1102. my @tokens;
  1103. my $depth = 6;
  1104. my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth);
  1105. my $match = qr/(?s: <! ( -- .*? -- \s* )+ > ) | # comment
  1106. (?s: <\? .*? \?> ) | # processing instruction
  1107. $nested_tags/ix; # nested tags
  1108. while ($str =~ m/($match)/g) {
  1109. my $whole_tag = $1;
  1110. my $sec_start = pos $str;
  1111. my $tag_start = $sec_start - length $whole_tag;
  1112. if ($pos < $tag_start) {
  1113. push @tokens, ['text', substr($str, $pos, $tag_start - $pos)];
  1114. }
  1115. push @tokens, ['tag', $whole_tag];
  1116. $pos = pos $str;
  1117. }
  1118. push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len;
  1119. \@tokens;
  1120. }
  1121. sub _Outdent {
  1122. #
  1123. # Remove one level of line-leading tabs or spaces
  1124. #
  1125. my $text = shift;
  1126. $text =~ s/^(\t|[ ]{1,$g_tab_width})//gm;
  1127. return $text;
  1128. }
  1129. sub _Detab {
  1130. #
  1131. # Cribbed from a post by Bart Lateur:
  1132. # <http://www.nntp.perl.org/group/perl.macperl.anyperl/154>
  1133. #
  1134. my $text = shift;
  1135. $text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge;
  1136. return $text;
  1137. }
  1138. 1;
  1139. __END__
  1140. =pod
  1141. =head1 NAME
  1142. B<Markdown>
  1143. =head1 SYNOPSIS
  1144. B<Markdown.pl> [ B<--html4tags> ] [ B<--version> ] [ B<-shortversion> ]
  1145. [ I<file> ... ]
  1146. =head1 DESCRIPTION
  1147. Markdown is a text-to-HTML filter; it translates an easy-to-read /
  1148. easy-to-write structured text format into HTML. Markdown's text format
  1149. is most similar to that of plain text email, and supports features such
  1150. as headers, *emphasis*, code blocks, blockquotes, and links.
  1151. Markdown's syntax is designed not as a generic markup language, but
  1152. specifically to serve as a front-end to (X)HTML. You can use span-level
  1153. HTML tags anywhere in a Markdown document, and you can use block level
  1154. HTML tags (like <div> and <table> as well).
  1155. For more information about Markdown's syntax, see:
  1156. http://daringfireball.net/projects/markdown/
  1157. =head1 OPTIONS
  1158. Use "--" to end switch parsing. For example, to open a file named "-z", use:
  1159. Markdown.pl -- -z
  1160. =over 4
  1161. =item B<--html4tags>
  1162. Use HTML 4 style for empty element tags, e.g.:
  1163. <br>
  1164. instead of Markdown's default XHTML style tags, e.g.:
  1165. <br />
  1166. =item B<-v>, B<--version>
  1167. Display Markdown's version number and copyright information.
  1168. =item B<-s>, B<--shortversion>
  1169. Display the short-form version number.
  1170. =back
  1171. =head1 BUGS
  1172. To file bug reports or feature requests (other than topics listed in the
  1173. Caveats section above) please send email to:
  1174. support@daringfireball.net
  1175. Please include with your report: (1) the example input; (2) the output
  1176. you expected; (3) the output Markdown actually produced.
  1177. =head1 VERSION HISTORY
  1178. See the readme file for detailed release notes for this version.
  1179. 1.0.2b2 - 20 Mar 2005
  1180. + Fix for nested sub-lists in list-paragraph mode. Previously we got
  1181. a spurious extra level of `<p>` tags for something like this:
  1182. * this
  1183. * sub
  1184. that
  1185. + Experimental support for [this] as a synonym for [this][].
  1186. (Note to self: No test yet for this.)
  1187. Be sure to test, e.g.: [permutations of this sort of [thing][].]
  1188. 1.0.2b1 - 28 Feb 2005
  1189. + Fix for backticks within HTML tag: <span attr='`ticks`'>like this</span>
  1190. + Fix for escaped backticks still triggering code spans:
  1191. There are two raw backticks here: \` and here: \`, not a code span
  1192. 1.0.1 - 14 Dec 2004
  1193. 1.0 - 28 Aug 2004
  1194. =head1 AUTHOR
  1195. John Gruber
  1196. http://daringfireball.net
  1197. PHP port and other contributions by Michel Fortin
  1198. http://michelf.com
  1199. =head1 COPYRIGHT AND LICENSE
  1200. Copyright (c) 2003-2005 John Gruber
  1201. <http://daringfireball.net/>
  1202. All rights reserved.
  1203. Redistribution and use in source and binary forms, with or without
  1204. modification, are permitted provided that the following conditions are
  1205. met:
  1206. * Redistributions of source code must retain the above copyright notice,
  1207. this list of conditions and the following disclaimer.
  1208. * Redistributions in binary form must reproduce the above copyright
  1209. notice, this list of conditions and the following disclaimer in the
  1210. documentation and/or other materials provided with the distribution.
  1211. * Neither the name "Markdown" nor the names of its contributors may
  1212. be used to endorse or promote products derived from this software
  1213. without specific prior written permission.
  1214. This software is provided by the copyright holders and contributors "as
  1215. is" and any express or implied warranties, including, but not limited
  1216. to, the implied warranties of merchantability and fitness for a
  1217. particular purpose are disclaimed. In no event shall the copyright owner
  1218. or contributors be liable for any direct, indirect, incidental, special,
  1219. exemplary, or consequential damages (including, but not limited to,
  1220. procurement of substitute goods or services; loss of use, data, or
  1221. profits; or business interruption) however caused and on any theory of
  1222. liability, whether in contract, strict liability, or tort (including
  1223. negligence or otherwise) arising in any way out of the use of this
  1224. software, even if advised of the possibility of such damage.
  1225. =cut