Index Pairs of a String
Problem
Given a string text and a list of strings words, return all index pairs [i, j] such that the substring text[i..j] (inclusive) is exactly one of the words. Return the pairs sorted by the first index, then by the second index.
text = "thestoryofleetcodeandme", words = ["story", "fleet", "leetcode"][[3, 7], [9, 13], [10, 17]]def index_pairs(text, words):
trie = {}
for w in words:
node = trie
for ch in w:
node = node.setdefault(ch, {})
node["$"] = True
res = []
for i in range(len(text)):
node = trie
for j in range(i, len(text)):
ch = text[j]
if ch not in node:
break
node = node[ch]
if node.get("$"):
res.append([i, j])
return res
function indexPairs(text, words) {
const trie = {};
for (const w of words) {
let node = trie;
for (const ch of w) node = node[ch] || (node[ch] = {});
node["$"] = true;
}
const res = [];
for (let i = 0; i < text.length; i++) {
let node = trie;
for (let j = i; j < text.length; j++) {
const ch = text[j];
if (!(ch in node)) break;
node = node[ch];
if (node["$"]) res.push([i, j]);
}
}
return res;
}
class Solution {
static class Node { Map<Character, Node> next = new HashMap<>(); boolean end; }
public int[][] indexPairs(String text, String[] words) {
Node trie = new Node();
for (String w : words) {
Node node = trie;
for (char ch : w.toCharArray())
node = node.next.computeIfAbsent(ch, k -> new Node());
node.end = true;
}
List<int[]> res = new ArrayList<>();
for (int i = 0; i < text.length(); i++) {
Node node = trie;
for (int j = i; j < text.length(); j++) {
Node nxt = node.next.get(text.charAt(j));
if (nxt == null) break;
node = nxt;
if (node.end) res.add(new int[]{ i, j });
}
}
return res.toArray(new int[0][]);
}
}
struct Node { unordered_map<char, Node*> next; bool end = false; };
vector<vector<int>> indexPairs(string text, vector<string>& words) {
Node* trie = new Node();
for (auto& w : words) {
Node* node = trie;
for (char ch : w) {
if (!node->next.count(ch)) node->next[ch] = new Node();
node = node->next[ch];
}
node->end = true;
}
vector<vector<int>> res;
for (int i = 0; i < (int)text.size(); i++) {
Node* node = trie;
for (int j = i; j < (int)text.size(); j++) {
if (!node->next.count(text[j])) break;
node = node->next[text[j]];
if (node->end) res.push_back({ i, j });
}
}
return res;
}
Explanation
We want every spot in text where some dictionary word appears, as index pairs [i, j]. The idea is to put all the words into a trie, then try to match words starting at each position of the text.
Building the trie is the usual letter-by-letter insert, marking the last node of each word with an end flag ("$"). This lets a single walk recognize multiple words that share prefixes at once.
Then for each start index i, we reset to the trie root and extend forward through j = i, i+1, …. As soon as a character has no matching edge, this start cannot produce any longer match, so we break. Whenever the node we reach has the end flag, the substring text[i..j] is a word and we record [i, j].
Example: text = "ababa", words ["aba", "ab"]. Starting at i = 0: a→b hits the end of ab (record [0,1]), then a finishes aba (record [0,2]). The scan repeats from each later start index.
Building costs the total length of the words; scanning is at most n starts times m the longest match, giving the stated bound.