comments | difficulty | edit_url | tags | ||||
---|---|---|---|---|---|---|---|
true |
简单 |
|
给你一个字符串 paragraph
和一个表示禁用词的字符串数组 banned
,返回出现频率最高的非禁用词。题目数据 保证 至少存在一个非禁用词,且答案 唯一 。
paragraph
中的单词 不区分大小写 ,答案应以 小写 形式返回。
示例 1:
输入:paragraph = "Bob hit a ball, the hit BALL flew far after it was hit.", banned = ["hit"] 输出:"ball" 解释: "hit" 出现了 3 次,但它是禁用词。 "ball" 出现了两次(没有其他单词出现这么多次),因此它是段落中出现频率最高的非禁用词。 请注意,段落中的单词不区分大小写, 标点符号会被忽略(即使它们紧挨着单词,如 "ball,"), 并且尽管 "hit" 出现的次数更多,但它不能作为答案,因为它是禁用词。
示例 2:
输入:paragraph = "a.", banned = [] 输出:"a"
提示:
1 <= paragraph.length <= 1000
paragraph
由英文字母、空格' '
、和以下符号组成:"!?',;."
0 <= banned.length <= 100
1 <= banned[i].length <= 10
banned[i]
仅由小写英文字母组成
正则匹配(或双指针)找出所有单词,用哈希表统计每个单词出现的频率,找到出现未在 banned 中出现且频率最大的单词。
class Solution:
def mostCommonWord(self, paragraph: str, banned: List[str]) -> str:
s = set(banned)
p = Counter(re.findall('[a-z]+', paragraph.lower()))
return next(word for word, _ in p.most_common() if word not in s)
import java.util.regex.Matcher;
import java.util.regex.Pattern;
class Solution {
private static Pattern pattern = Pattern.compile("[a-z]+");
public String mostCommonWord(String paragraph, String[] banned) {
Set<String> bannedWords = new HashSet<>();
for (String word : banned) {
bannedWords.add(word);
}
Map<String, Integer> counter = new HashMap<>();
Matcher matcher = pattern.matcher(paragraph.toLowerCase());
while (matcher.find()) {
String word = matcher.group();
if (bannedWords.contains(word)) {
continue;
}
counter.put(word, counter.getOrDefault(word, 0) + 1);
}
int max = Integer.MIN_VALUE;
String ans = null;
for (Map.Entry<String, Integer> entry : counter.entrySet()) {
if (entry.getValue() > max) {
max = entry.getValue();
ans = entry.getKey();
}
}
return ans;
}
}
class Solution {
public:
string mostCommonWord(string paragraph, vector<string>& banned) {
unordered_set<string> s(banned.begin(), banned.end());
unordered_map<string, int> counter;
string ans;
for (int i = 0, mx = 0, n = paragraph.size(); i < n;) {
if (!isalpha(paragraph[i]) && (++i > 0)) continue;
int j = i;
string word;
while (j < n && isalpha(paragraph[j])) {
word.push_back(tolower(paragraph[j]));
++j;
}
i = j + 1;
if (s.count(word)) continue;
++counter[word];
if (counter[word] > mx) {
ans = word;
mx = counter[word];
}
}
return ans;
}
};
func mostCommonWord(paragraph string, banned []string) string {
s := make(map[string]bool)
for _, w := range banned {
s[w] = true
}
counter := make(map[string]int)
var ans string
for i, mx, n := 0, 0, len(paragraph); i < n; {
if !unicode.IsLetter(rune(paragraph[i])) {
i++
continue
}
j := i
var word []byte
for j < n && unicode.IsLetter(rune(paragraph[j])) {
word = append(word, byte(unicode.ToLower(rune(paragraph[j]))))
j++
}
i = j + 1
t := string(word)
if s[t] {
continue
}
counter[t]++
if counter[t] > mx {
ans = t
mx = counter[t]
}
}
return ans
}
function mostCommonWord(paragraph: string, banned: string[]): string {
const s = paragraph.toLocaleLowerCase();
const map = new Map<string, number>();
const set = new Set<string>(banned);
for (const word of s.split(/[^A-z]/)) {
if (word === '' || set.has(word)) {
continue;
}
map.set(word, (map.get(word) ?? 0) + 1);
}
return [...map.entries()].reduce((r, v) => (v[1] > r[1] ? v : r), ['', 0])[0];
}
use std::collections::{HashMap, HashSet};
impl Solution {
pub fn most_common_word(mut paragraph: String, banned: Vec<String>) -> String {
paragraph.make_ascii_lowercase();
let banned: HashSet<&str> = banned.iter().map(String::as_str).collect();
let mut map = HashMap::new();
for word in paragraph.split(|c| !matches!(c, 'a'..='z')) {
if word.is_empty() || banned.contains(word) {
continue;
}
let val = map.get(&word).unwrap_or(&0) + 1;
map.insert(word, val);
}
map.into_iter()
.max_by_key(|&(_, v)| v)
.unwrap()
.0
.to_string()
}
}