1
2
3
4
5 package markdown
6
7 import (
8 "bytes"
9 "fmt"
10 "reflect"
11 "slices"
12 "strings"
13 )
14
15
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47 type Block interface {
48 Pos() Position
49 PrintHTML(buf *bytes.Buffer)
50 printMarkdown(buf *bytes.Buffer, s mdState)
51 }
52
53 type mdState struct {
54 prefix string
55 prefix1 string
56 bullet rune
57 num int
58 }
59
60 type Position struct {
61 StartLine int
62 EndLine int
63 }
64
65 func (p Position) Pos() Position {
66 return p
67 }
68
69 type buildState interface {
70 blocks() []Block
71 pos() Position
72 last() Block
73 deleteLast()
74
75 link(label string) *Link
76 defineLink(label string, link *Link)
77 newText(pos Position, text string) *Text
78 }
79
80 type blockBuilder interface {
81 extend(p *parseState, s line) (line, bool)
82 build(buildState) Block
83 }
84
85 type openBlock struct {
86 builder blockBuilder
87 inner []Block
88 pos Position
89 }
90
91 type itemBuilder struct {
92 list *listBuilder
93 width int
94 haveContent bool
95 }
96
97 func (p *parseState) last() Block {
98 ob := &p.stack[len(p.stack)-1]
99 return ob.inner[len(ob.inner)-1]
100 }
101
102 func (p *parseState) deleteLast() {
103 ob := &p.stack[len(p.stack)-1]
104 ob.inner = ob.inner[:len(ob.inner)-1]
105 }
106
107 type Text struct {
108 Position
109 Inline []Inline
110 raw string
111 }
112
113 func (b *Text) PrintHTML(buf *bytes.Buffer) {
114 for _, x := range b.Inline {
115 x.PrintHTML(buf)
116 }
117 }
118
119 func (b *Text) printMarkdown(buf *bytes.Buffer, s mdState) {
120 if s.prefix1 != "" {
121 buf.WriteString(s.prefix1)
122 } else {
123 buf.WriteString(s.prefix)
124 }
125 var prev Inline
126 for _, x := range b.Inline {
127 switch prev.(type) {
128 case *SoftBreak, *HardBreak:
129 buf.WriteString(s.prefix)
130 }
131 x.printMarkdown(buf)
132 prev = x
133 }
134 buf.WriteByte('\n')
135 }
136
137 type rootBuilder struct{}
138
139 func (b *rootBuilder) build(p buildState) Block {
140 return &Document{p.pos(), p.blocks(), p.(*parseState).links}
141 }
142
143 type Document struct {
144 Position
145 Blocks []Block
146 Links map[string]*Link
147 }
148
149
150
151
152
153 type Parser struct {
154
155
156
157
158
159
160 HeadingIDs bool
161
162
163
164
165 Strikethrough bool
166
167
168
169
170
171 TaskListItems bool
172
173
174 AutoLinkText bool
175 AutoLinkAssumeHTTP bool
176
177
178 Table bool
179
180
181 Emoji bool
182
183
184 SmartDot bool
185 SmartDash bool
186 SmartQuote bool
187 }
188
189 type parseState struct {
190 *Parser
191
192 root *Document
193 links map[string]*Link
194 lineno int
195 stack []openBlock
196 lineDepth int
197
198 corner bool
199
200
201 s string
202 emitted int
203 list []Inline
204
205
206 lists []*List
207 texts []*Text
208
209 backticks backtickParser
210 }
211
212 func (p *parseState) newText(pos Position, text string) *Text {
213 b := &Text{Position: pos, raw: text}
214 p.texts = append(p.texts, b)
215 return b
216 }
217
218 func (p *parseState) blocks() []Block {
219 b := &p.stack[len(p.stack)-1]
220 return b.inner
221 }
222
223 func (p *parseState) pos() Position {
224 b := &p.stack[len(p.stack)-1]
225 return b.pos
226 }
227
228 func (p *Parser) Parse(text string) *Document {
229 d, _ := p.parse(text)
230 return d
231 }
232
233 func (p *Parser) parse(text string) (d *Document, corner bool) {
234 var ps parseState
235 ps.Parser = p
236 if strings.Contains(text, "\x00") {
237 text = strings.ReplaceAll(text, "\x00", "\uFFFD")
238 ps.corner = true
239 }
240
241 ps.lineDepth = -1
242 ps.addBlock(&rootBuilder{})
243 for text != "" {
244 var ln string
245 i := strings.Index(text, "\n")
246 j := strings.Index(text, "\r")
247 var nl byte
248 switch {
249 case j >= 0 && (i < 0 || j < i):
250 ln = text[:j]
251 if i == j+1 {
252 text = text[j+2:]
253 nl = '\r' + '\n'
254 } else {
255 text = text[j+1:]
256 nl = '\r'
257 }
258 case i >= 0:
259 ln, text = text[:i], text[i+1:]
260 nl = '\n'
261 default:
262 ln, text = text, ""
263 }
264 ps.lineno++
265 ps.addLine(line{text: ln, nl: nl})
266 }
267 ps.trimStack(0)
268
269 for _, t := range ps.texts {
270 t.Inline = ps.inline(t.raw)
271 }
272
273 if p.TaskListItems {
274 for _, list := range ps.lists {
275 ps.taskList(list)
276 }
277 }
278
279 return ps.root, ps.corner
280 }
281
282 func (p *parseState) curB() blockBuilder {
283 if p.lineDepth < len(p.stack) {
284 return p.stack[p.lineDepth].builder
285 }
286 return nil
287 }
288
289 func (p *parseState) nextB() blockBuilder {
290 if p.lineDepth+1 < len(p.stack) {
291 return p.stack[p.lineDepth+1].builder
292 }
293 return nil
294 }
295 func (p *parseState) trimStack(depth int) {
296 if len(p.stack) < depth {
297 panic("trimStack")
298 }
299 for len(p.stack) > depth {
300 p.closeBlock()
301 }
302 }
303
304 func (p *parseState) addBlock(c blockBuilder) {
305 p.trimStack(p.lineDepth + 1)
306 p.stack = append(p.stack, openBlock{})
307 ob := &p.stack[len(p.stack)-1]
308 ob.builder = c
309 ob.pos.StartLine = p.lineno
310 ob.pos.EndLine = p.lineno
311 }
312
313 func (p *parseState) doneBlock(b Block) {
314 p.trimStack(p.lineDepth + 1)
315 ob := &p.stack[len(p.stack)-1]
316 ob.inner = append(ob.inner, b)
317 }
318
319 func (p *parseState) para() *paraBuilder {
320 if b, ok := p.stack[len(p.stack)-1].builder.(*paraBuilder); ok {
321 return b
322 }
323 return nil
324 }
325
326 func (p *parseState) closeBlock() Block {
327 b := &p.stack[len(p.stack)-1]
328 if b.builder == nil {
329 println("closeBlock", len(p.stack)-1)
330 }
331 blk := b.builder.build(p)
332 if list, ok := blk.(*List); ok {
333 p.corner = p.corner || listCorner(list)
334 if p.TaskListItems {
335 p.lists = append(p.lists, list)
336 }
337 }
338 p.stack = p.stack[:len(p.stack)-1]
339 if len(p.stack) > 0 {
340 b := &p.stack[len(p.stack)-1]
341 b.inner = append(b.inner, blk)
342
343 } else {
344 p.root = blk.(*Document)
345 }
346 return blk
347 }
348
349 func (p *parseState) link(label string) *Link {
350 return p.links[label]
351 }
352
353 func (p *parseState) defineLink(label string, link *Link) {
354 if p.links == nil {
355 p.links = make(map[string]*Link)
356 }
357 p.links[label] = link
358 }
359
360 type line struct {
361 spaces int
362 i int
363 tab int
364 text string
365 nl byte
366 }
367
368 func (p *parseState) addLine(s line) {
369
370 p.lineDepth = 0
371 for ; p.lineDepth+1 < len(p.stack); p.lineDepth++ {
372 old := s
373 var ok bool
374 s, ok = p.stack[p.lineDepth+1].builder.extend(p, s)
375 if !old.isBlank() && (ok || s != old) {
376 p.stack[p.lineDepth+1].pos.EndLine = p.lineno
377 }
378 if !ok {
379 break
380 }
381 }
382
383 if s.isBlank() {
384 p.trimStack(p.lineDepth + 1)
385 return
386 }
387
388
389 Prefixes:
390
391 for _, fn := range news {
392 if l, ok := fn(p, s); ok {
393 s = l
394 if s.isBlank() {
395 return
396 }
397 p.lineDepth++
398 goto Prefixes
399 }
400 }
401
402 newPara(p, s)
403 }
404
405 func (c *rootBuilder) extend(p *parseState, s line) (line, bool) {
406 panic("root extend")
407 }
408
409 var news = []func(*parseState, line) (line, bool){
410 newQuote,
411 newATXHeading,
412 newSetextHeading,
413 newHR,
414 newListItem,
415 newHTML,
416 newFence,
417 newPre,
418 }
419
420 func (s *line) peek() byte {
421 if s.spaces > 0 {
422 return ' '
423 }
424 if s.i >= len(s.text) {
425 return 0
426 }
427 return s.text[s.i]
428 }
429
430 func (s *line) skipSpace() {
431 s.spaces = 0
432 for s.i < len(s.text) && (s.text[s.i] == ' ' || s.text[s.i] == '\t') {
433 s.i++
434 }
435 }
436
437 func (s *line) trimSpace(min, max int, eolOK bool) bool {
438 t := *s
439 for n := 0; n < max; n++ {
440 if t.spaces > 0 {
441 t.spaces--
442 continue
443 }
444 if t.i >= len(t.text) && eolOK {
445 continue
446 }
447 if t.i < len(t.text) {
448 switch t.text[t.i] {
449 case '\t':
450 t.spaces = 4 - (t.i-t.tab)&3 - 1
451 t.i++
452 t.tab = t.i
453 continue
454 case ' ':
455 t.i++
456 continue
457 }
458 }
459 if n >= min {
460 break
461 }
462 return false
463 }
464 *s = t
465 return true
466 }
467
468 func (s *line) trim(c byte) bool {
469 if s.spaces > 0 {
470 if c == ' ' {
471 s.spaces--
472 return true
473 }
474 return false
475 }
476 if s.i < len(s.text) && s.text[s.i] == c {
477 s.i++
478 return true
479 }
480 return false
481 }
482
483 func (s *line) string() string {
484 switch s.spaces {
485 case 0:
486 return s.text[s.i:]
487 case 1:
488 return " " + s.text[s.i:]
489 case 2:
490 return " " + s.text[s.i:]
491 case 3:
492 return " " + s.text[s.i:]
493 }
494 panic("bad spaces")
495 }
496
497 func trimLeftSpaceTab(s string) string {
498 i := 0
499 for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
500 i++
501 }
502 return s[i:]
503 }
504
505 func trimRightSpaceTab(s string) string {
506 j := len(s)
507 for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t') {
508 j--
509 }
510 return s[:j]
511 }
512
513 func trimSpaceTab(s string) string {
514 i := 0
515 for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
516 i++
517 }
518 s = s[i:]
519 j := len(s)
520 for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t') {
521 j--
522 }
523 return s[:j]
524 }
525
526 func trimSpace(s string) string {
527 i := 0
528 for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
529 i++
530 }
531 s = s[i:]
532 j := len(s)
533 for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t') {
534 j--
535 }
536 return s[:j]
537 }
538
539 func trimSpaceTabNewline(s string) string {
540 i := 0
541 for i < len(s) && (s[i] == ' ' || s[i] == '\t' || s[i] == '\n') {
542 i++
543 }
544 s = s[i:]
545 j := len(s)
546 for j > 0 && (s[j-1] == ' ' || s[j-1] == '\t' || s[j-1] == '\n') {
547 j--
548 }
549 return s[:j]
550 }
551
552 func (s *line) isBlank() bool {
553 return trimLeftSpaceTab(s.text[s.i:]) == ""
554 }
555
556 func (s *line) eof() bool {
557 return s.i >= len(s.text)
558 }
559
560 func (s *line) trimSpaceString() string {
561 return trimLeftSpaceTab(s.text[s.i:])
562 }
563
564 func (s *line) trimString() string {
565 return trimSpaceTab(s.text[s.i:])
566 }
567
568 func ToHTML(b Block) string {
569 var buf bytes.Buffer
570 b.PrintHTML(&buf)
571 return buf.String()
572 }
573
574 func ToMarkdown(b Block) string {
575 var buf bytes.Buffer
576 b.printMarkdown(&buf, mdState{})
577 s := buf.String()
578
579 if strings.HasSuffix(s, "\n\n") {
580 s = s[:len(s)-1]
581 }
582 return s
583 }
584
585 func (b *Document) PrintHTML(buf *bytes.Buffer) {
586 for _, c := range b.Blocks {
587 c.PrintHTML(buf)
588 }
589 }
590
591 func (b *Document) printMarkdown(buf *bytes.Buffer, s mdState) {
592 printMarkdownBlocks(b.Blocks, buf, s)
593
594 var keys []string
595 for k := range b.Links {
596 keys = append(keys, k)
597 }
598 slices.Sort(keys)
599 for _, k := range keys {
600 l := b.Links[k]
601 fmt.Fprintf(buf, "[%s]: %s", k, l.URL)
602 printLinkTitleMarkdown(buf, l.Title, l.TitleChar)
603 buf.WriteByte('\n')
604 }
605 }
606
607 func printMarkdownBlocks(bs []Block, buf *bytes.Buffer, s mdState) {
608 prevEnd := 0
609 for _, b := range bs {
610
611 if prevEnd > 0 {
612 for i := prevEnd + 1; i < b.Pos().StartLine; i++ {
613 buf.WriteString(trimRightSpaceTab(s.prefix))
614 buf.WriteByte('\n')
615 }
616 }
617 b.printMarkdown(buf, s)
618 prevEnd = b.Pos().EndLine
619 s.prefix1 = ""
620 }
621 }
622
623 var (
624 blockType = reflect.TypeOf(new(Block)).Elem()
625 blocksType = reflect.TypeOf(new([]Block)).Elem()
626 inlinesType = reflect.TypeOf(new([]Inline)).Elem()
627 )
628
629 func printb(buf *bytes.Buffer, b Block, prefix string) {
630 fmt.Fprintf(buf, "(%T", b)
631 v := reflect.ValueOf(b)
632 v = reflect.Indirect(v)
633 if v.Kind() != reflect.Struct {
634 fmt.Fprintf(buf, " %v", b)
635 }
636 t := v.Type()
637 for i := 0; i < t.NumField(); i++ {
638 tf := t.Field(i)
639 if !tf.IsExported() {
640 continue
641 }
642 if tf.Type == inlinesType {
643 printis(buf, v.Field(i).Interface().([]Inline))
644 } else if tf.Type.Kind() == reflect.Slice && tf.Type.Elem().Kind() == reflect.String {
645 fmt.Fprintf(buf, " %s:%q", tf.Name, v.Field(i))
646 } else if tf.Type != blocksType && !tf.Type.Implements(blockType) && tf.Type.Kind() != reflect.Slice {
647 fmt.Fprintf(buf, " %s:%v", tf.Name, v.Field(i))
648 }
649 }
650
651 prefix += "\t"
652 for i := 0; i < t.NumField(); i++ {
653 tf := t.Field(i)
654 if !tf.IsExported() {
655 continue
656 }
657 if tf.Type.Implements(blockType) {
658 fmt.Fprintf(buf, "\n%s", prefix)
659 printb(buf, v.Field(i).Interface().(Block), prefix)
660 } else if tf.Type == blocksType {
661 vf := v.Field(i)
662 for i := 0; i < vf.Len(); i++ {
663 fmt.Fprintf(buf, "\n%s", prefix)
664 printb(buf, vf.Index(i).Interface().(Block), prefix)
665 }
666 } else if tf.Type.Kind() == reflect.Slice && tf.Type != inlinesType && tf.Type.Elem().Kind() != reflect.String {
667 fmt.Fprintf(buf, "\n%s%s:", prefix, t.Field(i).Name)
668 printslice(buf, v.Field(i), prefix)
669 }
670 }
671 fmt.Fprintf(buf, ")")
672 }
673
674 func printslice(buf *bytes.Buffer, v reflect.Value, prefix string) {
675 if v.Type().Elem().Kind() == reflect.Slice {
676 for i := 0; i < v.Len(); i++ {
677 fmt.Fprintf(buf, "\n%s#%d:", prefix, i)
678 printslice(buf, v.Index(i), prefix+"\t")
679 }
680 return
681 }
682 for i := 0; i < v.Len(); i++ {
683 fmt.Fprintf(buf, " ")
684 printb(buf, v.Index(i).Interface().(Block), prefix+"\t")
685 }
686 }
687
688 func printi(buf *bytes.Buffer, in Inline) {
689 fmt.Fprintf(buf, "%T(", in)
690 v := reflect.ValueOf(in).Elem()
691 text := v.FieldByName("Text")
692 if text.IsValid() {
693 fmt.Fprintf(buf, "%q", text)
694 }
695 inner := v.FieldByName("Inner")
696 if inner.IsValid() {
697 printis(buf, inner.Interface().([]Inline))
698 }
699 buf.WriteString(")")
700 }
701
702 func printis(buf *bytes.Buffer, ins []Inline) {
703 for _, in := range ins {
704 buf.WriteByte(' ')
705 printi(buf, in)
706 }
707 }
708
709 func dump(b Block) string {
710 var buf bytes.Buffer
711 printb(&buf, b, "")
712 return buf.String()
713 }
714
View as plain text