HTML Entity Parser
Problem
Replace HTML entities in the input string by their characters: " → ", ' → ', & → &, > → >, < → <, ⁄ → /.
text = "& is an HTML entity but &ambassador; is not.""& is an HTML entity but &ambassador; is not."def entity_parser(text):
table = {""": '"', "'": "'", "&": "&",
">": ">", "<": "<", "⁄": "/"}
out, i, n = [], 0, len(text)
while i < n:
if text[i] == "&":
matched = False
for ent, ch in table.items():
if text.startswith(ent, i):
out.append(ch); i += len(ent); matched = True; break
if not matched:
out.append("&"); i += 1
else:
out.append(text[i]); i += 1
return "".join(out)
function entityParser(text) {
const table = { """: '"', "'": "'", "&": "&",
">": ">", "<": "<", "⁄": "/" };
let out = "", i = 0;
while (i < text.length) {
if (text[i] === "&") {
let matched = false;
for (const ent in table) {
if (text.startsWith(ent, i)) {
out += table[ent]; i += ent.length; matched = true; break;
}
}
if (!matched) { out += "&"; i++; }
} else { out += text[i++]; }
}
return out;
}
class Solution {
public String entityParser(String text) {
Map<String, String> table = new LinkedHashMap<>();
table.put(""", "\""); table.put("'", "'");
table.put("&", "&"); table.put(">", ">");
table.put("<", "<"); table.put("⁄", "/");
StringBuilder sb = new StringBuilder();
int i = 0;
while (i < text.length()) {
if (text.charAt(i) == '&') {
boolean matched = false;
for (var e : table.entrySet()) {
if (text.startsWith(e.getKey(), i)) {
sb.append(e.getValue()); i += e.getKey().length(); matched = true; break;
}
}
if (!matched) { sb.append('&'); i++; }
} else { sb.append(text.charAt(i++)); }
}
return sb.toString();
}
}
string entityParser(string text) {
vector<pair<string,string>> table = {
{""", "\""}, {"'", "'"}, {"&", "&"},
{">", ">"}, {"<", "<"}, {"⁄", "/"}
};
string out;
int i = 0, n = text.size();
while (i < n) {
if (text[i] == '&') {
bool matched = false;
for (auto& [ent, ch] : table) {
if (text.compare(i, ent.size(), ent) == 0) {
out += ch; i += ent.size(); matched = true; break;
}
}
if (!matched) { out += '&'; i++; }
} else { out += text[i++]; }
}
return out;
}
Explanation
We walk through the text once and swap any known HTML entity (like &gt;) for the character it stands for. A small lookup table maps each entity name to its single character.
We keep an index i and build the answer in out. Whenever we hit an &, we test the table entries to see if the text starts with one of them at this spot. If it does, we append the matched character and jump i forward past the whole entity. If no entity matches, the & is just a literal, so we copy it and move one step. Any non-& character is copied straight across.
This works because every replacement only ever begins at an &, so we never accidentally rewrite ordinary text — only registered entity names get converted.
Example: "&gt; is an HTML entity but &ambassador; is not.". The &gt; matches and becomes >, but &ambassador; matches nothing in the table, so it is left exactly as written.
Each character is visited a constant number of times (the table is a fixed small size), so the scan is linear in the length of the text.