Sentence Similarity

easy hash set string

Problem

Given two sentences as word arrays and a list of similar word pairs (which are not transitive), determine whether the two sentences are similar word-by-word. Words always match themselves.

Input

s1=["great","acting","skills"], s2=["fine","drama","talent"], pairs=[["great","fine"],["drama","acting"],["skills","talent"]]

OutputTrue

Each corresponding word pair is either equal or appears in the similar set.

input

def areSentencesSimilar(sentence1, sentence2, similarPairs):
    if len(sentence1) != len(sentence2): return False
    pairs = set()
    for a, b in similarPairs:
        pairs.add((a, b)); pairs.add((b, a))
    for a, b in zip(sentence1, sentence2):
        if a != b and (a, b) not in pairs: return False
    return True

function areSentencesSimilar(s1, s2, pairs) {
  if (s1.length !== s2.length) return false;
  const set = new Set();
  for (const [a, b] of pairs) { set.add(a + '|' + b); set.add(b + '|' + a); }
  for (let i = 0; i < s1.length; i++) {
    if (s1[i] !== s2[i] && !set.has(s1[i] + '|' + s2[i])) return false;
  }
  return true;
}

class Solution {
  public boolean areSentencesSimilar(String[] s1, String[] s2, List<List<String>> pairs) {
    if (s1.length != s2.length) return false;
    Set<String> set = new HashSet<>();
    for (List<String> p : pairs) { set.add(p.get(0) + "|" + p.get(1)); set.add(p.get(1) + "|" + p.get(0)); }
    for (int i = 0; i < s1.length; i++)
      if (!s1[i].equals(s2[i]) && !set.contains(s1[i] + "|" + s2[i])) return false;
    return true;
  }
}

class Solution {
public:
  bool areSentencesSimilar(vector<string>& s1, vector<string>& s2, vector<vector<string>>& pairs) {
    if (s1.size() != s2.size()) return false;
    unordered_set<string> set;
    for (auto& p : pairs) { set.insert(p[0] + "|" + p[1]); set.insert(p[1] + "|" + p[0]); }
    for (int i = 0; i < (int)s1.size(); i++)
      if (s1[i] != s2[i] && !set.count(s1[i] + "|" + s2[i])) return false;
    return true;
  }
};

Explanation

Two sentences are "similar" only if they match word by word, where each pair of corresponding words is either identical or explicitly listed as a similar pair. The fast way to check membership is a hash set of allowed pairs.

First, if the sentences have different lengths, they can never line up word by word, so we return False right away.

Next we load the similar pairs into a set. Since similarity goes both ways (a~b means b~a), we add each pair in both directions, e.g. (a,b) and (b,a).

Then we walk the two sentences together with zip. For each position, the words are fine if they are equal or if the pair is in our set. If any position fails both tests, we return False; if all pass, we return True.

Example: ["great","acting","skills"] vs ["fine","drama","talent"] with pairs great~fine, drama~acting, skills~talent. Each column is a listed pair, so the result is True.

Time: O(n + p) Space: O(p)