forked from luchukun/mit-6.824-Distributed-System
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathii.go
68 lines (62 loc) · 2.05 KB
/
ii.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
package main
import "os"
import "fmt"
import (
"mapreduce"
"strings"
"unicode"
"strconv"
"sort"
)
// The mapping function is called once for each piece of the input.
// In this framework, the key is the name of the file that is being processed,
// and the value is the file's contents. The return value should be a slice of
// key/value pairs, each represented by a mapreduce.KeyValue.
func mapF(document string, value string) (res []mapreduce.KeyValue) {
// TODO: you should complete this to do the inverted index challenge
keys := strings.FieldsFunc(value,func(c rune) bool {
return !unicode.IsLetter(c)
})
//use map to implement set and drop duplicate key
keyset := make(map[string]bool)
for _,key := range keys {
if _,present := keyset[key];!present {
res = append(res,mapreduce.KeyValue{key,document})
keyset[key] = true
}
}
return res
}
// The reduce function is called once for each key generated by Map, with a
// list of that key's string value (merged across all inputs). The return value
// should be a single output value for that key.
func reduceF(key string, values []string) string {
// TODO: you should complete this to do the inverted index challenge
occurFilesNum := strconv.Itoa(len(values))
sort.Strings(values)
s := values[0]
for i := 1; i < len(values) ;i++ {
s = fmt.Sprintf("%s,%s",s,values[i])
}
res := fmt.Sprintf("%s %s",occurFilesNum,s)
return res
}
// Can be run in 3 ways:
// 1) Sequential (e.g., go run wc.go master sequential x1.txt .. xN.txt)
// 2) Master (e.g., go run wc.go master localhost:7777 x1.txt .. xN.txt)
// 3) Worker (e.g., go run wc.go worker localhost:7777 localhost:7778 &)
func main() {
if len(os.Args) < 4 {
fmt.Printf("%s: see usage comments in file\n", os.Args[0])
} else if os.Args[1] == "master" {
var mr *mapreduce.Master
if os.Args[2] == "sequential" {
mr = mapreduce.Sequential("iiseq", os.Args[3:], 3, mapF, reduceF)
} else {
mr = mapreduce.Distributed("iiseq", os.Args[3:], 3, os.Args[2])
}
mr.Wait()
} else {
mapreduce.RunWorker(os.Args[2], os.Args[3], mapF, reduceF, 100)
}
}