inline.go

     1  // Copyright 2021 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package markdown
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"strings"
    11  	"unicode"
    12  	"unicode/utf8"
    13  )
    14  
    15  /*
    16  text node can be
    17  
    18   - other literal text
    19   - run of * or _ characters
    20   - [
    21   - ![
    22  
    23  keep delimiter stack pointing at non-other literal text
    24  each node contains
    25  
    26   - type of delimiter [ ![ _ *
    27   - number of delimiters
    28   - active or not
    29   - potential opener, potential closer, or obth
    30  
    31  when a ] is hit, call look for link or image
    32  when end is hit, call process emphasis
    33  
    34  look for link or image:
    35  
    36  	find topmost [ or ![
    37  	if none, emit literal ]
    38  	if its inactive, remove and emit literal ]
    39  	parse ahead to look for rest of link; if none, remove and emit literal ]
    40  	run process emphasis on the interior,
    41  	remove opener
    42  	if this was a link (not an image), set all [ before opener to inactive, to avoid links inside links
    43  
    44  process emphasis
    45  
    46  	walk forward in list to find a closer.
    47  	walk back to find first potential matching opener.
    48  	if found:
    49  		strong for length >= 2
    50  		insert node
    51  		drop delimiters between opener and closer
    52  		remove 1 or 2 from open/close count, removing if now empty
    53  		if closing has some left, go around again on this node
    54  	if not:
    55  		set openers bottom for this kind of element to before current_position
    56  		if the closer at current pos is not an opener, remove it
    57  
    58  seems needlessly complex. two passes
    59  
    60  scan and find ` ` first.
    61  
    62  pass 1. scan and find [ and ]() and leave the rest alone.
    63  
    64  each completed one invokes emphasis on inner text and then on the overall list.
    65  
    66  */
    67  
    68  type Inline interface {
    69  	PrintHTML(*bytes.Buffer)
    70  	PrintText(*bytes.Buffer)
    71  	printMarkdown(*bytes.Buffer)
    72  }
    73  
    74  type Plain struct {
    75  	Text string
    76  }
    77  
    78  func (*Plain) Inline() {}
    79  
    80  func (x *Plain) PrintHTML(buf *bytes.Buffer) {
    81  	htmlEscaper.WriteString(buf, x.Text)
    82  }
    83  
    84  func (x *Plain) printMarkdown(buf *bytes.Buffer) {
    85  	buf.WriteString(x.Text)
    86  }
    87  
    88  func (x *Plain) PrintText(buf *bytes.Buffer) {
    89  	htmlEscaper.WriteString(buf, x.Text)
    90  }
    91  
    92  type openPlain struct {
    93  	Plain
    94  	i int // position in input where bracket is
    95  }
    96  
    97  type emphPlain struct {
    98  	Plain
    99  	canOpen  bool
   100  	canClose bool
   101  	i        int // position in output where emph is
   102  	n        int // length of original span
   103  }
   104  
   105  type Escaped struct {
   106  	Plain
   107  }
   108  
   109  func (x *Escaped) printMarkdown(buf *bytes.Buffer) {
   110  	buf.WriteByte('\\')
   111  	x.Plain.printMarkdown(buf)
   112  }
   113  
   114  type Code struct {
   115  	Text string
   116  }
   117  
   118  func (*Code) Inline() {}
   119  
   120  func (x *Code) PrintHTML(buf *bytes.Buffer) {
   121  	fmt.Fprintf(buf, "<code>%s</code>", htmlEscaper.Replace(x.Text))
   122  }
   123  
   124  func (x *Code) printMarkdown(buf *bytes.Buffer) {
   125  	if len(x.Text) == 0 {
   126  		return
   127  	}
   128  	// Use the fewest backticks we can, and add spaces as needed.
   129  	ticks := strings.Repeat("`", longestSequence(x.Text, '`')+1)
   130  	buf.WriteString(ticks)
   131  	if x.Text[0] == '`' {
   132  		buf.WriteByte(' ')
   133  	}
   134  	buf.WriteString(x.Text)
   135  	if x.Text[len(x.Text)-1] == '`' {
   136  		buf.WriteByte(' ')
   137  	}
   138  	buf.WriteString(ticks)
   139  }
   140  
   141  // longestSequence returns the length of the longest sequence of consecutive bytes b in s.
   142  func longestSequence(s string, b byte) int {
   143  	max := 0
   144  	cur := 0
   145  	for i := range s {
   146  		if s[i] == b {
   147  			cur++
   148  		} else {
   149  			if cur > max {
   150  				max = cur
   151  			}
   152  			cur = 0
   153  		}
   154  	}
   155  	if cur > max {
   156  		max = cur
   157  	}
   158  	return max
   159  }
   160  
   161  func (x *Code) PrintText(buf *bytes.Buffer) {
   162  	htmlEscaper.WriteString(buf, x.Text)
   163  }
   164  
   165  type Strong struct {
   166  	Marker string
   167  	Inner  []Inline
   168  }
   169  
   170  func (x *Strong) Inline() {
   171  }
   172  
   173  func (x *Strong) PrintHTML(buf *bytes.Buffer) {
   174  	buf.WriteString("<strong>")
   175  	for _, c := range x.Inner {
   176  		c.PrintHTML(buf)
   177  	}
   178  	buf.WriteString("</strong>")
   179  }
   180  
   181  func (x *Strong) printMarkdown(buf *bytes.Buffer) {
   182  	buf.WriteString(x.Marker)
   183  	for _, c := range x.Inner {
   184  		c.printMarkdown(buf)
   185  	}
   186  	buf.WriteString(x.Marker)
   187  }
   188  
   189  func (x *Strong) PrintText(buf *bytes.Buffer) {
   190  	for _, c := range x.Inner {
   191  		c.PrintText(buf)
   192  	}
   193  }
   194  
   195  type Del struct {
   196  	Marker string
   197  	Inner  []Inline
   198  }
   199  
   200  func (x *Del) Inline() {
   201  
   202  }
   203  
   204  func (x *Del) PrintHTML(buf *bytes.Buffer) {
   205  	buf.WriteString("<del>")
   206  	for _, c := range x.Inner {
   207  		c.PrintHTML(buf)
   208  	}
   209  	buf.WriteString("</del>")
   210  }
   211  
   212  func (x *Del) printMarkdown(buf *bytes.Buffer) {
   213  	buf.WriteString(x.Marker)
   214  	for _, c := range x.Inner {
   215  		c.printMarkdown(buf)
   216  	}
   217  	buf.WriteString(x.Marker)
   218  }
   219  
   220  func (x *Del) PrintText(buf *bytes.Buffer) {
   221  	for _, c := range x.Inner {
   222  		c.PrintText(buf)
   223  	}
   224  }
   225  
   226  type Emph struct {
   227  	Marker string
   228  	Inner  []Inline
   229  }
   230  
   231  func (*Emph) Inline() {}
   232  
   233  func (x *Emph) PrintHTML(buf *bytes.Buffer) {
   234  	buf.WriteString("<em>")
   235  	for _, c := range x.Inner {
   236  		c.PrintHTML(buf)
   237  	}
   238  	buf.WriteString("</em>")
   239  }
   240  
   241  func (x *Emph) printMarkdown(buf *bytes.Buffer) {
   242  	buf.WriteString(x.Marker)
   243  	for _, c := range x.Inner {
   244  		c.printMarkdown(buf)
   245  	}
   246  	buf.WriteString(x.Marker)
   247  }
   248  
   249  func (x *Emph) PrintText(buf *bytes.Buffer) {
   250  	for _, c := range x.Inner {
   251  		c.PrintText(buf)
   252  	}
   253  }
   254  
   255  func (p *parseState) emit(i int) {
   256  	if p.emitted < i {
   257  		p.list = append(p.list, &Plain{p.s[p.emitted:i]})
   258  		p.emitted = i
   259  	}
   260  }
   261  
   262  func (p *parseState) skip(i int) {
   263  	p.emitted = i
   264  }
   265  
   266  func (p *parseState) inline(s string) []Inline {
   267  	s = trimSpaceTab(s)
   268  	// Scan text looking for inlines.
   269  	// Leaf inlines are converted immediately.
   270  	// Non-leaf inlines have potential starts pushed on a stack while we await completion.
   271  	// Links take priority over other emphasis, so the emphasis must be delayed.
   272  	p.s = s
   273  	p.list = nil
   274  	p.emitted = 0
   275  	var opens []int // indexes of open ![ and [ Plains in p.list
   276  	var lastLinkOpen int
   277  	backticks := false
   278  	i := 0
   279  	for i < len(s) {
   280  		var parser func(*parseState, string, int) (Inline, int, int, bool)
   281  		switch s[i] {
   282  		case '\\':
   283  			parser = parseEscape
   284  		case '`':
   285  			if !backticks {
   286  				backticks = true
   287  				p.backticks.reset()
   288  			}
   289  			parser = p.backticks.parseCodeSpan
   290  		case '<':
   291  			parser = parseAutoLinkOrHTML
   292  		case '[':
   293  			parser = parseLinkOpen
   294  		case '!':
   295  			parser = parseImageOpen
   296  		case '_', '*':
   297  			parser = parseEmph
   298  		case '.':
   299  			if p.SmartDot {
   300  				parser = parseDot
   301  			}
   302  		case '-':
   303  			if p.SmartDash {
   304  				parser = parseDash
   305  			}
   306  		case '"', '\'':
   307  			if p.SmartQuote {
   308  				parser = parseEmph
   309  			}
   310  		case '~':
   311  			if p.Strikethrough {
   312  				parser = parseEmph
   313  			}
   314  		case '\n': // TODO what about eof
   315  			parser = parseBreak
   316  		case '&':
   317  			parser = parseHTMLEntity
   318  		case ':':
   319  			if p.Emoji {
   320  				parser = parseEmoji
   321  			}
   322  		}
   323  		if parser != nil {
   324  			if x, start, end, ok := parser(p, s, i); ok {
   325  				p.emit(start)
   326  				if _, ok := x.(*openPlain); ok {
   327  					opens = append(opens, len(p.list))
   328  				}
   329  				p.list = append(p.list, x)
   330  				i = end
   331  				p.skip(i)
   332  				continue
   333  			}
   334  		}
   335  		if s[i] == ']' && len(opens) > 0 {
   336  			oi := opens[len(opens)-1]
   337  			open := p.list[oi].(*openPlain)
   338  			opens = opens[:len(opens)-1]
   339  			if open.Text[0] == '!' || lastLinkOpen <= open.i {
   340  				if x, end, ok := p.parseLinkClose(s, i, open); ok {
   341  					p.corner = p.corner || x.corner || linkCorner(x.URL)
   342  					p.emit(i)
   343  					x.Inner = p.emph(nil, p.list[oi+1:])
   344  					if open.Text[0] == '!' {
   345  						p.list[oi] = (*Image)(x)
   346  					} else {
   347  						p.list[oi] = x
   348  					}
   349  					p.list = p.list[:oi+1]
   350  					p.skip(end)
   351  					i = end
   352  					if open.Text[0] == '[' {
   353  						// No links around links.
   354  						lastLinkOpen = open.i
   355  					}
   356  					continue
   357  				}
   358  			}
   359  		}
   360  		i++
   361  	}
   362  	p.emit(len(s))
   363  	p.list = p.emph(p.list[:0], p.list)
   364  	p.list = p.mergePlain(p.list)
   365  	p.list = p.autoLinkText(p.list)
   366  
   367  	return p.list
   368  }
   369  
   370  func (ps *parseState) emph(dst, src []Inline) []Inline {
   371  	const chars = "_*~\"'"
   372  	var stack [len(chars)][]*emphPlain
   373  	stackOf := func(c byte) int {
   374  		return strings.IndexByte(chars, c)
   375  	}
   376  
   377  	trimStack := func() {
   378  		for i := range stack {
   379  			stk := &stack[i]
   380  			for len(*stk) > 0 && (*stk)[len(*stk)-1].i >= len(dst) {
   381  				*stk = (*stk)[:len(*stk)-1]
   382  			}
   383  		}
   384  	}
   385  
   386  Src:
   387  	for i := 0; i < len(src); i++ {
   388  		if open, ok := src[i].(*openPlain); ok {
   389  			// Convert unused link/image open marker to plain text.
   390  			dst = append(dst, &open.Plain)
   391  			continue
   392  		}
   393  		p, ok := src[i].(*emphPlain)
   394  		if !ok {
   395  			dst = append(dst, src[i])
   396  			continue
   397  		}
   398  		if p.canClose {
   399  			stk := &stack[stackOf(p.Text[0])]
   400  		Loop:
   401  			for p.Text != "" {
   402  				// Looking for same symbol and compatible with p.Text.
   403  				for i := len(*stk) - 1; i >= 0; i-- {
   404  					start := (*stk)[i]
   405  					if (p.Text[0] == '*' || p.Text[0] == '_') && (p.canOpen && p.canClose || start.canOpen && start.canClose) && (p.n+start.n)%3 == 0 && (p.n%3 != 0 || start.n%3 != 0) {
   406  						continue
   407  					}
   408  					if p.Text[0] == '~' && len(p.Text) != len(start.Text) { // ~ matches ~, ~~ matches ~~
   409  						continue
   410  					}
   411  					if p.Text[0] == '"' {
   412  						dst[start.i].(*emphPlain).Text = "“"
   413  						p.Text = "”"
   414  						dst = append(dst, p)
   415  						*stk = (*stk)[:i]
   416  						// no trimStack
   417  						continue Src
   418  					}
   419  					if p.Text[0] == '\'' {
   420  						dst[start.i].(*emphPlain).Text = "‘"
   421  						p.Text = "’"
   422  						dst = append(dst, p)
   423  						*stk = (*stk)[:i]
   424  						// no trimStack
   425  						continue Src
   426  					}
   427  					var d int
   428  					if len(p.Text) >= 2 && len(start.Text) >= 2 {
   429  						// strong
   430  						d = 2
   431  					} else {
   432  						// emph
   433  						d = 1
   434  					}
   435  					del := p.Text[0] == '~'
   436  					x := &Emph{Marker: p.Text[:d], Inner: append([]Inline(nil), dst[start.i+1:]...)}
   437  					start.Text = start.Text[:len(start.Text)-d]
   438  					p.Text = p.Text[d:]
   439  					if start.Text == "" {
   440  						dst = dst[:start.i]
   441  					} else {
   442  						dst = dst[:start.i+1]
   443  					}
   444  					trimStack()
   445  					if del {
   446  						dst = append(dst, (*Del)(x))
   447  					} else if d == 2 {
   448  						dst = append(dst, (*Strong)(x))
   449  					} else {
   450  						dst = append(dst, x)
   451  					}
   452  					continue Loop
   453  				}
   454  				break
   455  			}
   456  		}
   457  		if p.Text != "" {
   458  			stk := &stack[stackOf(p.Text[0])]
   459  			if p.Text == "'" {
   460  				p.Text = "’"
   461  			}
   462  			if p.Text == "\"" {
   463  				if p.canClose {
   464  					p.Text = "”"
   465  				} else {
   466  					p.Text = "“"
   467  				}
   468  			}
   469  			if p.canOpen {
   470  				p.i = len(dst)
   471  				dst = append(dst, p)
   472  				*stk = append(*stk, p)
   473  			} else {
   474  				dst = append(dst, &p.Plain)
   475  			}
   476  		}
   477  	}
   478  	return dst
   479  }
   480  
   481  func mdUnescape(s string) string {
   482  	if !strings.Contains(s, `\`) && !strings.Contains(s, `&`) {
   483  		return s
   484  	}
   485  	return mdUnescaper.Replace(s)
   486  }
   487  
   488  var mdUnescaper = func() *strings.Replacer {
   489  	var list = []string{
   490  		`\!`, `!`,
   491  		`\"`, `"`,
   492  		`\#`, `#`,
   493  		`\$`, `$`,
   494  		`\%`, `%`,
   495  		`\&`, `&`,
   496  		`\'`, `'`,
   497  		`\(`, `(`,
   498  		`\)`, `)`,
   499  		`\*`, `*`,
   500  		`\+`, `+`,
   501  		`\,`, `,`,
   502  		`\-`, `-`,
   503  		`\.`, `.`,
   504  		`\/`, `/`,
   505  		`\:`, `:`,
   506  		`\;`, `;`,
   507  		`\<`, `<`,
   508  		`\=`, `=`,
   509  		`\>`, `>`,
   510  		`\?`, `?`,
   511  		`\@`, `@`,
   512  		`\[`, `[`,
   513  		`\\`, `\`,
   514  		`\]`, `]`,
   515  		`\^`, `^`,
   516  		`\_`, `_`,
   517  		"\\`", "`",
   518  		`\{`, `{`,
   519  		`\|`, `|`,
   520  		`\}`, `}`,
   521  		`\~`, `~`,
   522  	}
   523  
   524  	for name, repl := range htmlEntity {
   525  		list = append(list, name, repl)
   526  	}
   527  	return strings.NewReplacer(list...)
   528  }()
   529  
   530  func isPunct(c byte) bool {
   531  	return '!' <= c && c <= '/' || ':' <= c && c <= '@' || '[' <= c && c <= '`' || '{' <= c && c <= '~'
   532  }
   533  
   534  func parseEscape(p *parseState, s string, i int) (Inline, int, int, bool) {
   535  	if i+1 < len(s) {
   536  		c := s[i+1]
   537  		if isPunct(c) {
   538  			return &Escaped{Plain{s[i+1 : i+2]}}, i, i + 2, true
   539  		}
   540  		if c == '\n' { // TODO what about eof
   541  			if i > 0 && s[i-1] == '\\' {
   542  				p.corner = true // goldmark mishandles \\\ newline
   543  			}
   544  			end := i + 2
   545  			for end < len(s) && (s[end] == ' ' || s[end] == '\t') {
   546  				end++
   547  			}
   548  			return &HardBreak{}, i, end, true
   549  		}
   550  	}
   551  	return nil, 0, 0, false
   552  }
   553  
   554  func parseDot(p *parseState, s string, i int) (Inline, int, int, bool) {
   555  	if i+2 < len(s) && s[i+1] == '.' && s[i+2] == '.' {
   556  		return &Plain{"…"}, i, i + 3, true
   557  	}
   558  	return nil, 0, 0, false
   559  }
   560  
   561  func parseDash(p *parseState, s string, i int) (Inline, int, int, bool) {
   562  	if i+1 >= len(s) || s[i+1] != '-' {
   563  		return nil, 0, 0, false
   564  	}
   565  
   566  	n := 2
   567  	for i+n < len(s) && s[i+n] == '-' {
   568  		n++
   569  	}
   570  
   571  	// Mimic cmark-gfm. Can't make this stuff up.
   572  	em, en := 0, 0
   573  	switch {
   574  	case n%3 == 0:
   575  		em = n / 3
   576  	case n%2 == 0:
   577  		en = n / 2
   578  	case n%3 == 2:
   579  		em = (n - 2) / 3
   580  		en = 1
   581  	case n%3 == 1:
   582  		em = (n - 4) / 3
   583  		en = 2
   584  	}
   585  	return &Plain{strings.Repeat("—", em) + strings.Repeat("–", en)}, i, i + n, true
   586  }
   587  
   588  // Inline code span markers must fit on punched cards, to match cmark-gfm.
   589  const maxBackticks = 80
   590  
   591  type backtickParser struct {
   592  	last    [maxBackticks]int
   593  	scanned bool
   594  }
   595  
   596  func (b *backtickParser) reset() {
   597  	*b = backtickParser{}
   598  }
   599  
   600  func (b *backtickParser) parseCodeSpan(p *parseState, s string, i int) (Inline, int, int, bool) {
   601  	start := i
   602  	// Count leading backticks. Need to find that many again.
   603  	n := 1
   604  	for i+n < len(s) && s[i+n] == '`' {
   605  		n++
   606  	}
   607  
   608  	// If we've already scanned the whole string (for a different count),
   609  	// we can skip a failed scan by checking whether we saw this count.
   610  	// To enable this optimization, following cmark-gfm, we declare by fiat
   611  	// that more than maxBackticks backquotes is too many.
   612  	if n > len(b.last) || b.scanned && b.last[n-1] < i+n {
   613  		goto NoMatch
   614  	}
   615  
   616  	for end := i + n; end < len(s); {
   617  		if s[end] != '`' {
   618  			end++
   619  			continue
   620  		}
   621  		estart := end
   622  		for end < len(s) && s[end] == '`' {
   623  			end++
   624  		}
   625  		m := end - estart
   626  		if !b.scanned && m < len(b.last) {
   627  			b.last[m-1] = estart
   628  		}
   629  		if m == n {
   630  			// Match.
   631  			// Line endings are converted to single spaces.
   632  			text := s[i+n : estart]
   633  			text = strings.ReplaceAll(text, "\n", " ")
   634  
   635  			// If enclosed text starts and ends with a space and is not all spaces,
   636  			// one space is removed from start and end, to allow `` ` `` to quote a single backquote.
   637  			if len(text) >= 2 && text[0] == ' ' && text[len(text)-1] == ' ' && trimSpace(text) != "" {
   638  				text = text[1 : len(text)-1]
   639  			}
   640  
   641  			return &Code{text}, start, end, true
   642  		}
   643  	}
   644  	b.scanned = true
   645  
   646  NoMatch:
   647  	// No match, so none of these backticks count: skip them all.
   648  	// For example ``x` is not a single backtick followed by a code span.
   649  	// Returning nil, 0, false would advance to the second backtick and try again.
   650  	return &Plain{s[i : i+n]}, start, i + n, true
   651  }
   652  
   653  func parseAutoLinkOrHTML(p *parseState, s string, i int) (Inline, int, int, bool) {
   654  	if x, end, ok := parseAutoLinkURI(s, i); ok {
   655  		return x, i, end, true
   656  	}
   657  	if x, end, ok := parseAutoLinkEmail(s, i); ok {
   658  		return x, i, end, true
   659  	}
   660  	if x, end, ok := parseHTMLTag(p, s, i); ok {
   661  		return x, i, end, true
   662  	}
   663  	return nil, 0, 0, false
   664  }
   665  
   666  func isLetter(c byte) bool {
   667  	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
   668  }
   669  
   670  func isLDH(c byte) bool {
   671  	return isLetterDigit(c) || c == '-'
   672  }
   673  
   674  func isLetterDigit(c byte) bool {
   675  	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9'
   676  }
   677  
   678  func parseLinkOpen(_ *parseState, s string, i int) (Inline, int, int, bool) {
   679  	return &openPlain{Plain{s[i : i+1]}, i + 1}, i, i + 1, true
   680  }
   681  
   682  func parseImageOpen(_ *parseState, s string, i int) (Inline, int, int, bool) {
   683  	if i+1 < len(s) && s[i+1] == '[' {
   684  		return &openPlain{Plain{s[i : i+2]}, i + 2}, i, i + 2, true
   685  	}
   686  	return nil, 0, 0, false
   687  }
   688  
   689  func parseEmph(p *parseState, s string, i int) (Inline, int, int, bool) {
   690  	c := s[i]
   691  	j := i + 1
   692  	if c == '*' || c == '~' || c == '_' {
   693  		for j < len(s) && s[j] == c {
   694  			j++
   695  		}
   696  	}
   697  	if c == '~' && j-i != 2 {
   698  		// Goldmark does not accept ~text~
   699  		// and incorrectly accepts ~~~text~~~.
   700  		// Only ~~ is correct.
   701  		p.corner = true
   702  	}
   703  	if c == '~' && j-i > 2 {
   704  		return &Plain{s[i:j]}, i, j, true
   705  	}
   706  
   707  	var before, after rune
   708  	if i == 0 {
   709  		before = ' '
   710  	} else {
   711  		before, _ = utf8.DecodeLastRuneInString(s[:i])
   712  	}
   713  	if j >= len(s) {
   714  		after = ' '
   715  	} else {
   716  		after, _ = utf8.DecodeRuneInString(s[j:])
   717  	}
   718  
   719  	// “A left-flanking delimiter run is a delimiter run that is
   720  	// (1) not followed by Unicode whitespace, and either
   721  	// (2a) not followed by a Unicode punctuation character, or
   722  	// (2b) followed by a Unicode punctuation character
   723  	// and preceded by Unicode whitespace or a Unicode punctuation character.
   724  	// For purposes of this definition, the beginning and the end
   725  	// of the line count as Unicode whitespace.”
   726  	leftFlank := !isUnicodeSpace(after) &&
   727  		(!isUnicodePunct(after) || isUnicodeSpace(before) || isUnicodePunct(before))
   728  
   729  	// “A right-flanking delimiter run is a delimiter run that is
   730  	// (1) not preceded by Unicode whitespace, and either
   731  	// (2a) not preceded by a Unicode punctuation character, or
   732  	// (2b) preceded by a Unicode punctuation character
   733  	// and followed by Unicode whitespace or a Unicode punctuation character.
   734  	// For purposes of this definition, the beginning and the end
   735  	// of the line count as Unicode whitespace.”
   736  	rightFlank := !isUnicodeSpace(before) &&
   737  		(!isUnicodePunct(before) || isUnicodeSpace(after) || isUnicodePunct(after))
   738  
   739  	var canOpen, canClose bool
   740  
   741  	switch c {
   742  	case '\'', '"':
   743  		canOpen = leftFlank && !rightFlank && before != ']' && before != ')'
   744  		canClose = rightFlank
   745  	case '*', '~':
   746  		// “A single * character can open emphasis iff
   747  		// it is part of a left-flanking delimiter run.”
   748  
   749  		// “A double ** can open strong emphasis iff
   750  		// it is part of a left-flanking delimiter run.”
   751  		canOpen = leftFlank
   752  
   753  		// “A single * character can close emphasis iff
   754  		// it is part of a right-flanking delimiter run.”
   755  
   756  		// “A double ** can close strong emphasis iff
   757  		// it is part of a right-flanking delimiter run.”
   758  		canClose = rightFlank
   759  	case '_':
   760  		// “A single _ character can open emphasis iff
   761  		// it is part of a left-flanking delimiter run and either
   762  		// (a) not part of a right-flanking delimiter run or
   763  		// (b) part of a right-flanking delimiter run preceded by a Unicode punctuation character.”
   764  
   765  		// “A double __ can open strong emphasis iff
   766  		// it is part of a left-flanking delimiter run and either
   767  		// (a) not part of a right-flanking delimiter run or
   768  		// (b) part of a right-flanking delimiter run preceded by a Unicode punctuation character.”
   769  		canOpen = leftFlank && (!rightFlank || isUnicodePunct(before))
   770  
   771  		// “A single _ character can close emphasis iff
   772  		// it is part of a right-flanking delimiter run and either
   773  		// (a) not part of a left-flanking delimiter run or
   774  		// (b) part of a left-flanking delimiter run followed by a Unicode punctuation character.”
   775  
   776  		// “A double __ can close strong emphasis iff
   777  		// it is part of a right-flanking delimiter run and either
   778  		// (a) not part of a left-flanking delimiter run or
   779  		// (b) part of a left-flanking delimiter run followed by a Unicode punctuation character.”
   780  		canClose = rightFlank && (!leftFlank || isUnicodePunct(after))
   781  	}
   782  
   783  	return &emphPlain{Plain: Plain{s[i:j]}, canOpen: canOpen, canClose: canClose, n: j - i}, i, j, true
   784  }
   785  
   786  func isUnicodeSpace(r rune) bool {
   787  	if r < 0x80 {
   788  		return r == ' ' || r == '\t' || r == '\f' || r == '\n'
   789  	}
   790  	return unicode.In(r, unicode.Zs)
   791  }
   792  
   793  func isUnicodePunct(r rune) bool {
   794  	if r < 0x80 {
   795  		return isPunct(byte(r))
   796  	}
   797  	return unicode.In(r, unicode.Punct)
   798  }
   799  
   800  func (p *parseState) parseLinkClose(s string, i int, open *openPlain) (*Link, int, bool) {
   801  	if i+1 < len(s) {
   802  		switch s[i+1] {
   803  		case '(':
   804  			// Inline link - [Text](Dest Title), with Title omitted or both Dest and Title omitted.
   805  			i := skipSpace(s, i+2)
   806  			var dest, title string
   807  			var titleChar byte
   808  			var corner bool
   809  			if i < len(s) && s[i] != ')' {
   810  				var ok bool
   811  				dest, i, ok = parseLinkDest(s, i)
   812  				if !ok {
   813  					break
   814  				}
   815  				i = skipSpace(s, i)
   816  				if i < len(s) && s[i] != ')' {
   817  					title, titleChar, i, ok = parseLinkTitle(s, i)
   818  					if title == "" {
   819  						corner = true
   820  					}
   821  					if !ok {
   822  						break
   823  					}
   824  					i = skipSpace(s, i)
   825  				}
   826  			}
   827  			if i < len(s) && s[i] == ')' {
   828  				return &Link{URL: dest, Title: title, TitleChar: titleChar, corner: corner}, i + 1, true
   829  			}
   830  			// NOTE: Test malformed ( ) with shortcut reference
   831  			// TODO fall back on syntax error?
   832  
   833  		case '[':
   834  			// Full reference link - [Text][Label]
   835  			label, i, ok := parseLinkLabel(p, s, i+1)
   836  			if !ok {
   837  				break
   838  			}
   839  			if link, ok := p.links[normalizeLabel(label)]; ok {
   840  				return &Link{URL: link.URL, Title: link.Title, corner: link.corner}, i, true
   841  			}
   842  			// Note: Could break here, but CommonMark dingus does not
   843  			// fall back to trying Text for [Text][Label] when Label is unknown.
   844  			// Unclear from spec what the correct answer is.
   845  			return nil, 0, false
   846  		}
   847  	}
   848  
   849  	// Collapsed or shortcut reference link: [Text][] or [Text].
   850  	end := i + 1
   851  	if strings.HasPrefix(s[end:], "[]") {
   852  		end += 2
   853  	}
   854  
   855  	if link, ok := p.links[normalizeLabel(s[open.i:i])]; ok {
   856  		return &Link{URL: link.URL, Title: link.Title, corner: link.corner}, end, true
   857  	}
   858  	return nil, 0, false
   859  }
   860  
   861  func skipSpace(s string, i int) int {
   862  	// Note: Blank lines have already been removed.
   863  	for i < len(s) && (s[i] == ' ' || s[i] == '\t' || s[i] == '\n') {
   864  		i++
   865  	}
   866  	return i
   867  }
   868  
   869  func linkCorner(url string) bool {
   870  	for i := 0; i < len(url); i++ {
   871  		if url[i] == '%' {
   872  			if i+2 >= len(url) || !isHexDigit(url[i+1]) || !isHexDigit(url[i+2]) {
   873  				// Goldmark and the Dingus re-escape such percents as %25,
   874  				// but the spec does not seem to require this behavior.
   875  				return true
   876  			}
   877  		}
   878  	}
   879  	return false
   880  }
   881  
   882  func (p *parseState) mergePlain(list []Inline) []Inline {
   883  	out := list[:0]
   884  	start := 0
   885  	for i := 0; ; i++ {
   886  		if i < len(list) && toPlain(list[i]) != nil {
   887  			continue
   888  		}
   889  		// Non-Plain or end of list.
   890  		if start < i {
   891  			out = append(out, mergePlain1(list[start:i]))
   892  		}
   893  		if i >= len(list) {
   894  			break
   895  		}
   896  		out = append(out, list[i])
   897  		start = i + 1
   898  	}
   899  	return out
   900  }
   901  
   902  func toPlain(x Inline) *Plain {
   903  	// TODO what about Escaped?
   904  	switch x := x.(type) {
   905  	case *Plain:
   906  		return x
   907  	case *emphPlain:
   908  		return &x.Plain
   909  	case *openPlain:
   910  		return &x.Plain
   911  	}
   912  	return nil
   913  }
   914  
   915  func mergePlain1(list []Inline) *Plain {
   916  	if len(list) == 1 {
   917  		return toPlain(list[0])
   918  	}
   919  	var all []string
   920  	for _, pl := range list {
   921  		all = append(all, toPlain(pl).Text)
   922  	}
   923  	return &Plain{Text: strings.Join(all, "")}
   924  }
   925  
   926  func parseEmoji(p *parseState, s string, i int) (Inline, int, int, bool) {
   927  	for j := i + 1; ; j++ {
   928  		if j >= len(s) || j-i > 2+maxEmojiLen {
   929  			break
   930  		}
   931  		if s[j] == ':' {
   932  			name := s[i+1 : j]
   933  			if utf, ok := emoji[name]; ok {
   934  				return &Emoji{s[i : j+1], utf}, i, j + 1, true
   935  			}
   936  			break
   937  		}
   938  	}
   939  	return nil, 0, 0, false
   940  }
   941  
   942  type Emoji struct {
   943  	Name string // emoji :name:, including colons
   944  	Text string // Unicode for emoji sequence
   945  }
   946  
   947  func (*Emoji) Inline() {}
   948  
   949  func (x *Emoji) PrintHTML(buf *bytes.Buffer) {
   950  	htmlEscaper.WriteString(buf, x.Text)
   951  }
   952  
   953  func (x *Emoji) printMarkdown(buf *bytes.Buffer) {
   954  	buf.WriteString(x.Text)
   955  }
   956  
   957  func (x *Emoji) PrintText(buf *bytes.Buffer) {
   958  	htmlEscaper.WriteString(buf, x.Text)
   959  }
   960
View as plain text