Longest Duplicate Substring

hard binary search rolling hash string

Problem

Given a string s, find a duplicated substring — a contiguous substring that occurs (possibly overlapping) two or more times in s — of the longest possible length. If no duplicated substring exists, return the empty string. If several have the maximum length, any one of them is acceptable.

Inputs = "banana"
Output"ana"
"ana" appears at index 1 and index 3 (overlapping). No length-4 substring repeats, so 3 is the maximum.

def longest_dup_substring(s):
    MOD, BASE = (1 << 61) - 1, 131
    n = len(s)

    def has_dup(L):
        if L == 0:
            return 0
        h, p = 0, pow(BASE, L, MOD)
        for i in range(L):
            h = (h * BASE + ord(s[i])) % MOD
        seen = {h: [0]}
        for i in range(L, n):
            h = (h * BASE + ord(s[i]) - ord(s[i - L]) * p) % MOD
            start = i - L + 1
            if h in seen:
                for j in seen[h]:
                    if s[j:j + L] == s[start:start + L]:
                        return start
                seen[h].append(start)
            else:
                seen[h] = [start]
        return -1

    lo, hi, best = 1, n - 1, 0
    while lo <= hi:
        mid = (lo + hi) // 2
        pos = has_dup(mid)
        if pos != -1:
            best, lo = pos, mid + 1
        else:
            hi = mid - 1
    L = hi
    return s[best:best + L]
function longestDupSubstring(s) {
  const MOD = 1125899906842597n, BASE = 131n;
  const n = s.length;

  function hasDup(L) {
    if (L === 0) return 0;
    let h = 0n, p = 1n;
    for (let k = 0; k < L; k++) p = (p * BASE) % MOD;
    for (let i = 0; i < L; i++) h = (h * BASE + BigInt(s.charCodeAt(i))) % MOD;
    const seen = new Map([[h, [0]]]);
    for (let i = L; i < n; i++) {
      h = (h * BASE + BigInt(s.charCodeAt(i)) - BigInt(s.charCodeAt(i - L)) * p) % MOD;
      h = ((h % MOD) + MOD) % MOD;
      const start = i - L + 1;
      if (seen.has(h)) {
        for (const j of seen.get(h))
          if (s.substr(j, L) === s.substr(start, L)) return start;
        seen.get(h).push(start);
      } else seen.set(h, [start]);
    }
    return -1;
  }

  let lo = 1, hi = n - 1, best = 0;
  while (lo <= hi) {
    const mid = (lo + hi) >> 1;
    const pos = hasDup(mid);
    if (pos !== -1) { best = pos; lo = mid + 1; }
    else hi = mid - 1;
  }
  return s.substr(best, hi);
}
class Solution {
    long MOD = 1125899906842597L, BASE = 131L;
    String s; int n;

    int hasDup(int L) {
        if (L == 0) return 0;
        long h = 0, p = 1;
        for (int k = 0; k < L; k++) p = (p * BASE) % MOD;
        for (int i = 0; i < L; i++) h = (h * BASE + s.charAt(i)) % MOD;
        Map<Long, List<Integer>> seen = new HashMap<>();
        seen.put(h, new ArrayList<>(List.of(0)));
        for (int i = L; i < n; i++) {
            h = (h * BASE + s.charAt(i) - s.charAt(i - L) * p) % MOD;
            h = ((h % MOD) + MOD) % MOD;
            int start = i - L + 1;
            if (seen.containsKey(h)) {
                for (int j : seen.get(h))
                    if (s.substring(j, j + L).equals(s.substring(start, start + L))) return start;
                seen.get(h).add(start);
            } else seen.put(h, new ArrayList<>(List.of(start)));
        }
        return -1;
    }

    public String longestDupSubstring(String str) {
        s = str; n = s.length();
        int lo = 1, hi = n - 1, best = 0;
        while (lo <= hi) {
            int mid = (lo + hi) / 2, pos = hasDup(mid);
            if (pos != -1) { best = pos; lo = mid + 1; }
            else hi = mid - 1;
        }
        return s.substring(best, best + hi);
    }
}
class Solution {
    string s; int n;
    const long long MOD = 1125899906842597LL, BASE = 131;

    int hasDup(int L) {
        if (L == 0) return 0;
        long long h = 0, p = 1;
        for (int k = 0; k < L; k++) p = (__int128)p * BASE % MOD;
        for (int i = 0; i < L; i++) h = ((__int128)h * BASE + s[i]) % MOD;
        unordered_map<long long, vector<int>> seen;
        seen[h] = {0};
        for (int i = L; i < n; i++) {
            h = ((__int128)h * BASE + s[i] - (__int128)s[i - L] * p) % MOD;
            h = (h % MOD + MOD) % MOD;
            int start = i - L + 1;
            if (seen.count(h)) {
                for (int j : seen[h])
                    if (s.compare(j, L, s, start, L) == 0) return start;
                seen[h].push_back(start);
            } else seen[h] = {start};
        }
        return -1;
    }
public:
    string longestDupSubstring(string str) {
        s = str; n = s.size();
        int lo = 1, hi = n - 1, best = 0;
        while (lo <= hi) {
            int mid = (lo + hi) / 2, pos = hasDup(mid);
            if (pos != -1) { best = pos; lo = mid + 1; }
            else hi = mid - 1;
        }
        return s.substr(best, hi);
    }
};
Time: O(n log n) Space: O(n)