Compare Strings by Frequency of the Smallest Character
Problem
Define f(s) as the count of the lexicographically smallest character in s. For each query, return the number of words w such that f(query) < f(w).
queries = ["cbd"], words = ["zaaaz"][1]from bisect import bisect_right
def f(s):
m = min(s)
return s.count(m)
def num_smaller_by_frequency(queries, words):
wf = sorted(f(w) for w in words)
out = []
for q in queries:
fq = f(q)
out.append(len(wf) - bisect_right(wf, fq))
return out
function f(s) {
let m = s[0];
for (const c of s) if (c < m) m = c;
let cnt = 0;
for (const c of s) if (c === m) cnt++;
return cnt;
}
function numSmallerByFrequency(queries, words) {
const wf = words.map(f).sort((a, b) => a - b);
return queries.map(q => {
const fq = f(q);
let lo = 0, hi = wf.length;
while (lo < hi) {
const mid = (lo + hi) >> 1;
if (wf[mid] <= fq) lo = mid + 1; else hi = mid;
}
return wf.length - lo;
});
}
class Solution {
int f(String s) {
char m = 'z' + 1 == 0 ? 0 : 'z';
for (char c : s.toCharArray()) if (c < m) m = c;
int cnt = 0;
for (char c : s.toCharArray()) if (c == m) cnt++;
return cnt;
}
public int[] numSmallerByFrequency(String[] queries, String[] words) {
int[] wf = new int[words.length];
for (int i = 0; i < words.length; i++) wf[i] = f(words[i]);
Arrays.sort(wf);
int[] out = new int[queries.length];
for (int i = 0; i < queries.length; i++) {
int fq = f(queries[i]);
int lo = 0, hi = wf.length;
while (lo < hi) {
int mid = (lo + hi) >>> 1;
if (wf[mid] <= fq) lo = mid + 1; else hi = mid;
}
out[i] = wf.length - lo;
}
return out;
}
}
int f(const string& s) {
char m = *min_element(s.begin(), s.end());
return count(s.begin(), s.end(), m);
}
vector<int> numSmallerByFrequency(vector<string>& queries, vector<string>& words) {
vector<int> wf;
for (auto& w : words) wf.push_back(f(w));
sort(wf.begin(), wf.end());
vector<int> out;
for (auto& q : queries) {
int fq = f(q);
out.push_back(wf.end() - upper_bound(wf.begin(), wf.end(), fq));
}
return out;
}
Explanation
Every string boils down to a single number: f(s), the count of its smallest letter. For example f("zaaaz") is 3 because the smallest letter is 'a' and it appears three times. The whole problem turns into "how many word-numbers are bigger than each query-number".
The slow way would compare each query against every word. The faster idea is to compute f for all words once, then sort those values into wf. Sorting lets us answer each query with a quick binary search instead of a full scan.
For a query value fq we want the count of words with f(w) > fq. Using bisect_right(wf, fq) we find how many sorted values are ≤ fq; everything after that index is strictly greater. So the answer is len(wf) - bisect_right(wf, fq).
bisect_right (an upper bound) is important: it lands just past any values equal to fq, so equal frequencies are correctly excluded from the "strictly greater" count.
Example: queries=["cbd"], words=["zaaaz"]. f("cbd")=1 and the sorted word values are [3]. There is one value above 1, so the answer is [1].