collections.Counter
在扁平列表中,使用集合忽略单个子列表中的重复值:
appearances = Counter(word for sub in arr for word in set(sub))
# Counter({'hola': 4, 'hello': 3, 'ciao': 3, 'bonjour': 2, 'namaste': 2})
[[word for word in sub if appearances[word] >= threshold] for sub in arr]
from collections import Counter
def threshold_filter(arr, threshold):
appearances = Counter(word for sub in arr for word in set(sub))
return [
[word for word in sub if appearances[word] >= threshold]
for sub in arr
]
print(threshold_filter(test, 3))
# Result
[['hello', 'hola'], ['hello', 'hola'], ['hello', 'ciao'], ['hola', 'ciao'], ['hola', 'ciao'], []]