parse.go

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package dag implements a language for expressing directed acyclic
     6  // graphs.
     7  //
     8  // The general syntax of a rule is:
     9  //
    10  //	a, b < c, d;
    11  //
    12  // which means c and d come after a and b in the partial order
    13  // (that is, there are edges from c and d to a and b),
    14  // but doesn't provide a relative order between a vs b or c vs d.
    15  //
    16  // The rules can chain together, as in:
    17  //
    18  //	e < f, g < h;
    19  //
    20  // which is equivalent to
    21  //
    22  //	e < f, g;
    23  //	f, g < h;
    24  //
    25  // Except for the special bottom element "NONE", each name
    26  // must appear exactly once on the right-hand side of any rule.
    27  // That rule serves as the definition of the allowed successor
    28  // for that name. The definition must appear before any uses
    29  // of the name on the left-hand side of a rule. (That is, the
    30  // rules themselves must be ordered according to the partial
    31  // order, for easier reading by people.)
    32  //
    33  // Negative assertions double-check the partial order:
    34  //
    35  //	i !< j
    36  //
    37  // means that it must NOT be the case that i < j.
    38  // Negative assertions may appear anywhere in the rules,
    39  // even before i and j have been defined.
    40  //
    41  // Comments begin with #.
    42  package dag
    43  
    44  import (
    45  	"cmp"
    46  	"fmt"
    47  	"slices"
    48  	"strings"
    49  )
    50  
    51  type Graph struct {
    52  	Nodes   []string
    53  	byLabel map[string]int
    54  	edges   map[string]map[string]bool
    55  }
    56  
    57  func newGraph() *Graph {
    58  	return &Graph{byLabel: map[string]int{}, edges: map[string]map[string]bool{}}
    59  }
    60  
    61  func (g *Graph) addNode(label string) bool {
    62  	if _, ok := g.byLabel[label]; ok {
    63  		return false
    64  	}
    65  	g.byLabel[label] = len(g.Nodes)
    66  	g.Nodes = append(g.Nodes, label)
    67  	g.edges[label] = map[string]bool{}
    68  	return true
    69  }
    70  
    71  func (g *Graph) AddEdge(from, to string) {
    72  	g.edges[from][to] = true
    73  }
    74  
    75  func (g *Graph) DelEdge(from, to string) {
    76  	delete(g.edges[from], to)
    77  }
    78  
    79  func (g *Graph) HasEdge(from, to string) bool {
    80  	return g.edges[from] != nil && g.edges[from][to]
    81  }
    82  
    83  func (g *Graph) Edges(from string) []string {
    84  	edges := make([]string, 0, 16)
    85  	for k := range g.edges[from] {
    86  		edges = append(edges, k)
    87  	}
    88  	slices.SortFunc(edges, func(a, b string) int {
    89  		return cmp.Compare(g.byLabel[a], g.byLabel[b])
    90  	})
    91  	return edges
    92  }
    93  
    94  // Parse parses the DAG language and returns the transitive closure of
    95  // the described graph. In the returned graph, there is an edge from "b"
    96  // to "a" if b < a (or a > b) in the partial order.
    97  func Parse(dag string) (*Graph, error) {
    98  	g := newGraph()
    99  	disallowed := []rule{}
   100  
   101  	rules, err := parseRules(dag)
   102  	if err != nil {
   103  		return nil, err
   104  	}
   105  
   106  	// TODO: Add line numbers to errors.
   107  	var errors []string
   108  	errorf := func(format string, a ...any) {
   109  		errors = append(errors, fmt.Sprintf(format, a...))
   110  	}
   111  	for _, r := range rules {
   112  		if r.op == "!<" {
   113  			disallowed = append(disallowed, r)
   114  			continue
   115  		}
   116  		for _, def := range r.def {
   117  			if def == "NONE" {
   118  				errorf("NONE cannot be a predecessor")
   119  				continue
   120  			}
   121  			if !g.addNode(def) {
   122  				errorf("multiple definitions for %s", def)
   123  			}
   124  			for _, less := range r.less {
   125  				if less == "NONE" {
   126  					continue
   127  				}
   128  				if _, ok := g.byLabel[less]; !ok {
   129  					errorf("use of %s before its definition", less)
   130  				} else {
   131  					g.AddEdge(def, less)
   132  				}
   133  			}
   134  		}
   135  	}
   136  
   137  	// Check for missing definition.
   138  	for _, tos := range g.edges {
   139  		for to := range tos {
   140  			if g.edges[to] == nil {
   141  				errorf("missing definition for %s", to)
   142  			}
   143  		}
   144  	}
   145  
   146  	// Complete transitive closure.
   147  	for _, k := range g.Nodes {
   148  		for _, i := range g.Nodes {
   149  			for _, j := range g.Nodes {
   150  				if i != k && k != j && g.HasEdge(i, k) && g.HasEdge(k, j) {
   151  					if i == j {
   152  						// Can only happen along with a "use of X before deps" error above,
   153  						// but this error is more specific - it makes clear that reordering the
   154  						// rules will not be enough to fix the problem.
   155  						errorf("graph cycle: %s < %s < %s", j, k, i)
   156  					}
   157  					g.AddEdge(i, j)
   158  				}
   159  			}
   160  		}
   161  	}
   162  
   163  	// Check negative assertions against completed allowed graph.
   164  	for _, bad := range disallowed {
   165  		for _, less := range bad.less {
   166  			for _, def := range bad.def {
   167  				if g.HasEdge(def, less) {
   168  					errorf("graph edge assertion failed: %s !< %s", less, def)
   169  				}
   170  			}
   171  		}
   172  	}
   173  
   174  	if len(errors) > 0 {
   175  		return nil, fmt.Errorf("%s", strings.Join(errors, "\n"))
   176  	}
   177  
   178  	return g, nil
   179  }
   180  
   181  // A rule is a line in the DAG language where "less < def" or "less !< def".
   182  type rule struct {
   183  	less []string
   184  	op   string // Either "<" or "!<"
   185  	def  []string
   186  }
   187  
   188  type syntaxError string
   189  
   190  func (e syntaxError) Error() string {
   191  	return string(e)
   192  }
   193  
   194  // parseRules parses the rules of a DAG.
   195  func parseRules(rules string) (out []rule, err error) {
   196  	defer func() {
   197  		e := recover()
   198  		switch e := e.(type) {
   199  		case nil:
   200  			return
   201  		case syntaxError:
   202  			err = e
   203  		default:
   204  			panic(e)
   205  		}
   206  	}()
   207  	p := &rulesParser{lineno: 1, text: rules}
   208  
   209  	var prev []string
   210  	var op string
   211  	for {
   212  		list, tok := p.nextList()
   213  		if tok == "" {
   214  			if prev == nil {
   215  				break
   216  			}
   217  			p.syntaxError("unexpected EOF")
   218  		}
   219  		if prev != nil {
   220  			out = append(out, rule{prev, op, list})
   221  		}
   222  		prev = list
   223  		if tok == ";" {
   224  			prev = nil
   225  			op = ""
   226  			continue
   227  		}
   228  		if tok != "<" && tok != "!<" {
   229  			p.syntaxError("missing <")
   230  		}
   231  		op = tok
   232  	}
   233  
   234  	return out, err
   235  }
   236  
   237  // A rulesParser parses the depsRules syntax described above.
   238  type rulesParser struct {
   239  	lineno   int
   240  	lastWord string
   241  	text     string
   242  }
   243  
   244  // syntaxError reports a parsing error.
   245  func (p *rulesParser) syntaxError(msg string) {
   246  	panic(syntaxError(fmt.Sprintf("parsing graph: line %d: syntax error: %s near %s", p.lineno, msg, p.lastWord)))
   247  }
   248  
   249  // nextList parses and returns a comma-separated list of names.
   250  func (p *rulesParser) nextList() (list []string, token string) {
   251  	for {
   252  		tok := p.nextToken()
   253  		switch tok {
   254  		case "":
   255  			if len(list) == 0 {
   256  				return nil, ""
   257  			}
   258  			fallthrough
   259  		case ",", "<", "!<", ";":
   260  			p.syntaxError("bad list syntax")
   261  		}
   262  		list = append(list, tok)
   263  
   264  		tok = p.nextToken()
   265  		if tok != "," {
   266  			return list, tok
   267  		}
   268  	}
   269  }
   270  
   271  // nextToken returns the next token in the deps rules,
   272  // one of ";" "," "<" "!<" or a name.
   273  func (p *rulesParser) nextToken() string {
   274  	for {
   275  		if p.text == "" {
   276  			return ""
   277  		}
   278  		switch p.text[0] {
   279  		case ';', ',', '<':
   280  			t := p.text[:1]
   281  			p.text = p.text[1:]
   282  			return t
   283  
   284  		case '!':
   285  			if len(p.text) < 2 || p.text[1] != '<' {
   286  				p.syntaxError("unexpected token !")
   287  			}
   288  			p.text = p.text[2:]
   289  			return "!<"
   290  
   291  		case '#':
   292  			i := strings.Index(p.text, "\n")
   293  			if i < 0 {
   294  				i = len(p.text)
   295  			}
   296  			p.text = p.text[i:]
   297  			continue
   298  
   299  		case '\n':
   300  			p.lineno++
   301  			fallthrough
   302  		case ' ', '\t':
   303  			p.text = p.text[1:]
   304  			continue
   305  
   306  		default:
   307  			i := strings.IndexAny(p.text, "!;,<#\n \t")
   308  			if i < 0 {
   309  				i = len(p.text)
   310  			}
   311  			t := p.text[:i]
   312  			p.text = p.text[i:]
   313  			p.lastWord = t
   314  			return t
   315  		}
   316  	}
   317  }
   318
View as plain text