Source file
tour/solutions/webcrawler.go
1
2
3
4
5
6
7 package main
8
9 import (
10 "errors"
11 "fmt"
12 "sync"
13 )
14
15 type Fetcher interface {
16
17
18 Fetch(url string) (body string, urls []string, err error)
19 }
20
21
22
23
24 var fetched = struct {
25 m map[string]error
26 sync.Mutex
27 }{m: make(map[string]error)}
28
29 var loading = errors.New("url load in progress")
30
31
32
33 func Crawl(url string, depth int, fetcher Fetcher) {
34 if depth <= 0 {
35 fmt.Printf("<- Done with %v, depth 0.\n", url)
36 return
37 }
38
39 fetched.Lock()
40 if _, ok := fetched.m[url]; ok {
41 fetched.Unlock()
42 fmt.Printf("<- Done with %v, already fetched.\n", url)
43 return
44 }
45
46 fetched.m[url] = loading
47 fetched.Unlock()
48
49
50 body, urls, err := fetcher.Fetch(url)
51
52
53 fetched.Lock()
54 fetched.m[url] = err
55 fetched.Unlock()
56
57 if err != nil {
58 fmt.Printf("<- Error on %v: %v\n", url, err)
59 return
60 }
61 fmt.Printf("Found: %s %q\n", url, body)
62 done := make(chan bool)
63 for i, u := range urls {
64 fmt.Printf("-> Crawling child %v/%v of %v : %v.\n", i, len(urls), url, u)
65 go func(url string) {
66 Crawl(url, depth-1, fetcher)
67 done <- true
68 }(u)
69 }
70 for i, u := range urls {
71 fmt.Printf("<- [%v] %v/%v Waiting for child %v.\n", url, i, len(urls), u)
72 <-done
73 }
74 fmt.Printf("<- Done with %v\n", url)
75 }
76
77 func main() {
78 Crawl("https://golang.org/", 4, fetcher)
79
80 fmt.Println("Fetching stats\n--------------")
81 for url, err := range fetched.m {
82 if err != nil {
83 fmt.Printf("%v failed: %v\n", url, err)
84 } else {
85 fmt.Printf("%v was fetched\n", url)
86 }
87 }
88 }
89
90
91 type fakeFetcher map[string]*fakeResult
92
93 type fakeResult struct {
94 body string
95 urls []string
96 }
97
98 func (f *fakeFetcher) Fetch(url string) (string, []string, error) {
99 if res, ok := (*f)[url]; ok {
100 return res.body, res.urls, nil
101 }
102 return "", nil, fmt.Errorf("not found: %s", url)
103 }
104
105
106 var fetcher = &fakeFetcher{
107 "https://golang.org/": &fakeResult{
108 "The Go Programming Language",
109 []string{
110 "https://golang.org/pkg/",
111 "https://golang.org/cmd/",
112 },
113 },
114 "https://golang.org/pkg/": &fakeResult{
115 "Packages",
116 []string{
117 "https://golang.org/",
118 "https://golang.org/cmd/",
119 "https://golang.org/pkg/fmt/",
120 "https://golang.org/pkg/os/",
121 },
122 },
123 "https://golang.org/pkg/fmt/": &fakeResult{
124 "Package fmt",
125 []string{
126 "https://golang.org/",
127 "https://golang.org/pkg/",
128 },
129 },
130 "https://golang.org/pkg/os/": &fakeResult{
131 "Package os",
132 []string{
133 "https://golang.org/",
134 "https://golang.org/pkg/",
135 },
136 },
137 }
138
View as plain text