Word Frequency

medium string bash sorting

Problem

Given a text file words.txt, output the words it contains together with their frequencies, sorted by frequency from greatest to least. Words are separated by spaces. Each word may have multiple spaces around it. The canonical bash one-liner is cat words.txt | tr -s ' ' '\n' | sort | uniq -c | sort -rn | awk '{print $2, $1}'.

Input"the day is sunny the the\nthe sunny is is"
Outputthe 4\nis 3\nsunny 2\nday 1
Counts are computed per token; output sorted by count desc, breaking ties lexicographically.

# Bash one-liner:
# cat words.txt | tr -s ' ' '\n' | sort | uniq -c | sort -rn | awk '{print $2, $1}'
from collections import Counter

def word_frequency(lines):
    counts = Counter()
    for line in lines:
        for tok in line.split():
            counts[tok] += 1
    items = sorted(counts.items(), key=lambda kv: (-kv[1], kv[0]))
    return [f"{w} {c}" for w, c in items]
// Bash one-liner:
// cat words.txt | tr -s ' ' '\n' | sort | uniq -c | sort -rn | awk '{print $2, $1}'
function wordFrequency(lines) {
  const counts = new Map();
  for (const line of lines) {
    for (const tok of line.split(/\s+/).filter(Boolean)) {
      counts.set(tok, (counts.get(tok) || 0) + 1);
    }
  }
  return [...counts.entries()]
    .sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))
    .map(([w, c]) => `${w} ${c}`);
}
// Bash one-liner:
// cat words.txt | tr -s ' ' '\n' | sort | uniq -c | sort -rn | awk '{print $2, $1}'
class Solution {
    public List<String> wordFrequency(List<String> lines) {
        Map<String, Integer> counts = new HashMap<>();
        for (String line : lines)
            for (String tok : line.trim().split("\\s+"))
                if (!tok.isEmpty())
                    counts.merge(tok, 1, Integer::sum);
        return counts.entrySet().stream()
            .sorted((a, b) -> b.getValue().equals(a.getValue())
                ? a.getKey().compareTo(b.getKey())
                : b.getValue() - a.getValue())
            .map(e -> e.getKey() + " " + e.getValue())
            .collect(Collectors.toList());
    }
}
// Bash one-liner:
// cat words.txt | tr -s ' ' '\n' | sort | uniq -c | sort -rn | awk '{print $2, $1}'
vector<string> wordFrequency(vector<string>& lines) {
    unordered_map<string, int> counts;
    for (auto& line : lines) {
        stringstream ss(line);
        string tok;
        while (ss >> tok) counts[tok]++;
    }
    vector<pair<string, int>> v(counts.begin(), counts.end());
    sort(v.begin(), v.end(), [](auto& a, auto& b) {
        return a.second != b.second ? a.second > b.second : a.first < b.first;
    });
    vector<string> out;
    for (auto& p : v) out.push_back(p.first + " " + to_string(p.second));
    return out;
}
Time: O(W + K log K) Space: O(K)