Most Common Word

easy hash-map string counting

Problem

Given a paragraph and a list of banned words, return the most frequent word that is not in the banned list. The answer is guaranteed to be unique and words are case-insensitive.

Inputparagraph = "Bob hit a ball, the hit BALL flew far after it was hit." banned = ["hit"]
Output"ball"
"hit" occurs 3 times but is banned; "ball" occurs 2 times (case-insensitive) and is not banned.

import re
from collections import Counter

def mostCommonWord(paragraph, banned):
    words = re.findall(r"[a-z]+", paragraph.lower())
    banned_set = set(banned)
    counts = Counter(w for w in words if w not in banned_set)
    return counts.most_common(1)[0][0]
function mostCommonWord(paragraph, banned) {
  const words = paragraph.toLowerCase().match(/[a-z]+/g) || [];
  const ban = new Set(banned);
  const counts = new Map();
  for (const w of words) {
    if (!ban.has(w)) counts.set(w, (counts.get(w) || 0) + 1);
  }
  let best = "", bestCount = 0;
  for (const [w, c] of counts) if (c > bestCount) { best = w; bestCount = c; }
  return best;
}
import java.util.*;
class Solution {
  public String mostCommonWord(String paragraph, String[] banned) {
    Set<String> ban = new HashSet<>(Arrays.asList(banned));
    Map<String,Integer> counts = new HashMap<>();
    String best = ""; int bestCount = 0;
    for (String w : paragraph.toLowerCase().split("[^a-z]+")) {
      if (w.isEmpty() || ban.contains(w)) continue;
      int c = counts.getOrDefault(w, 0) + 1;
      counts.put(w, c);
      if (c > bestCount) { best = w; bestCount = c; }
    }
    return best;
  }
}
#include <bits/stdc++.h>
using namespace std;
class Solution {
public:
    string mostCommonWord(string paragraph, vector<string>& banned) {
        unordered_set<string> ban(banned.begin(), banned.end());
        unordered_map<string,int> counts;
        string best, cur;
        int bestCount = 0;
        for (char &c : paragraph) c = isalpha(c) ? tolower(c) : ' ';
        stringstream ss(paragraph);
        while (ss >> cur) {
            if (ban.count(cur)) continue;
            if (++counts[cur] > bestCount) { bestCount = counts[cur]; best = cur; }
        }
        return best;
    }
};
Time: O(P + B) Space: O(P + B)