Merge Email Lists

题目地址:
https://gist.github.com/gcrfelix/a6849f256372b7c40a2265ed88d4a8b8

题目:
合并邮件列表(后来才知道也是个面经题)
    Given 1 million email list:
    list 1: a@a.com, b@b.com
    list 2: b@b.com, c@c.com
    list 3: e@e.com
    list 4: a@a.com
...
    Combine lists with identical emails, and output tuples:
            (list 1, list 2, list 4) (a@a.com, b@b.com, c@c.com)
            (list 3) (e@e.com)

思路分析:
这道题首先要把每个邮件出现在哪个list存下来,然后建立一个parent array用来union(大的指向小的)。之后根据parent的结果和input来将指向同一个root的email放进hash(是一个integer和set的map)。

代码:

public List<Set<String>> mergeEmails(String[][] emails) {
    List<Set<String>> result = new ArrayList<Set<String>>();
    if(emails == null || emails.length == 0 || emails[0].length == 0) {
        return result;
    }
    int m = emails.length;
    int[] parent = new int[m]; // initialize parent array, at the begining, each list is in its own group    for(int i=0; i<m; i++) {
        parent[i] = i;
    }
    // key: email, value: List of set numbers that email belongs to    Map<String, List<Integer>> map = new HashMap<String, List<Integer>>();
    for(int i=0; i<m; i++) {
        for(String email : emails[i]) {
            if(!map.containsKey(email)) {
                List<Integer> list = new ArrayList<Integer>();
                list.add(i);
                map.put(email, list);
            } else {
                map.get(email).add(i);
            }
        }
    }
    // union    for(List<Integer> list : map.values()) {
        // list is already in order        for(int i=0; i<list.size(); i++) {
            // make larger parent node point to small parent node            union(parent, list.get(0), list.get(i));
        }
    }
    HashMap<Integer, Set<String>> hash = new HashMap<Integer, Set<String>>();
    //initialization    for(int i=0; i<m; i++) {
        hash.put(i, new HashSet<String>());
    }
    for(int i=0; i<m; i++) {
        if(i != parent[i]) {
            int p = parent[i];
            // we have to add twice here            if(hash.get(p).isEmpty()) {
                hash.get(p).addAll(Arrays.asList(emails[p]));
            }
            hash.get(p).addAll(Arrays.asList(emails[i]));
        } else {   // 如果parent的值指向自身,说明是单独的集合,直接加入            hash.get(i).addAll(Arrays.asList(emails[i]));
        }
    }
    for(Set<String> set : hash.values()) {
        if(!set.isEmpty()) {
            result.add(set);
        }
    }
    return result;
}
public void union(int[] parent, int x, int y) {
    int parentX = getParent(parent, x);
    int parentY = getParent(parent, y);
    if(parentX < parentY) {
        parent[parentY] = parentX;
    } else if(parentX > parentY) {
        parent[parentX] = parentY;
    }
}
public int getParent(int[] parent, int x) {
    while(x != parent[x]) {
        parent[x] = parent[parent[x]];
        x = parent[x];
    }
    return x;
}
public static void main(String[] args) {
    String[] e0 = {"a@a.com", "b@b.com"};
    String[] e1 = {"b@b.com", "c@c.com"};
    String[] e2 = {"e@e.com"};
    String[] e3 = {"a@a.com"};
    String[][] emails = {e0, e1, e2, e3};
    MergeEmailLists test = new MergeEmailLists();
    List<Set<String>> result = test.mergeEmails(emails);
    for(Set<String> set : result) {
        System.out.println(set);
    }
}







Comments

Popular Posts