Find Duplicate File in System

medium hash map string

Problem

Each input is a directory line: 'dir f1.txt(content1) f2.txt(content2) …'. Group all file paths sharing the same content.

Input['root/a 1.txt(abc) 2.txt(def)','root/c 3.txt(abc) 4.txt(def)']
Output[['root/a/1.txt','root/c/3.txt'],['root/a/2.txt','root/c/4.txt']]
Group full paths by content.

def find_duplicate(paths):
    from collections import defaultdict
    g = defaultdict(list)
    for line in paths:
        parts = line.split(); d = parts[0]
        for f in parts[1:]:
            i = f.index('('); name, content = f[:i], f[i+1:-1]
            g[content].append(d + '/' + name)
    return [v for v in g.values() if len(v) > 1]
function findDuplicate(paths) {
  const g = new Map();
  for (const line of paths) {
    const [d, ...files] = line.split(' ');
    for (const f of files) {
      const i = f.indexOf('('), name = f.slice(0, i), content = f.slice(i+1, -1);
      (g.get(content) ?? g.set(content, []).get(content)).push(d + '/' + name);
    }
  }
  return [...g.values()].filter(v => v.length > 1);
}
List> findDuplicate(String[] paths) {
    Map> g = new HashMap<>();
    for (String line : paths) {
        String[] parts = line.split(" "); String d = parts[0];
        for (int i = 1; i < parts.length; i++) {
            int p = parts[i].indexOf('(');
            String name = parts[i].substring(0, p), content = parts[i].substring(p+1, parts[i].length()-1);
            g.computeIfAbsent(content, k -> new ArrayList<>()).add(d + "/" + name);
        }
    }
    List> out = new ArrayList<>();
    for (var v : g.values()) if (v.size() > 1) out.add(v);
    return out;
}
vector> findDuplicate(vector& paths) {
    unordered_map> g;
    for (auto& line : paths) {
        stringstream ss(line); string d, f; ss >> d;
        while (ss >> f) {
            int p = f.find('(');
            string name = f.substr(0, p), content = f.substr(p+1, f.size()-p-2);
            g[content].push_back(d + "/" + name);
        }
    }
    vector> out;
    for (auto& [k, v] : g) if (v.size() > 1) out.push_back(v);
    return out;
}
Time: O(N·L) Space: O(N·L)