这种东西很适合
generator
:
代码:
def find_triplets(data):
data = iter(data)
while True:
name = next(data)
demo = next(data)
officer = demo == 'OFFICER'
if officer:
demo = next(data)
yield name, officer, demo
测试代码:
info = [
'Joe Schmoe',
'W / M / 64',
'Lillian Schmoe',
'W / F / 60',
'Richard Johnson',
'OFFICER',
'W / M /48',
'Adrian Stevens',
'? / ? / 27'
]
for x in find_triplets(info):
print(x)
结果:
('Joe Schmoe', False, 'W / M / 64')
('Lillian Schmoe', False, 'W / F / 60')
('Richard Johnson', True, 'W / M /48')
('Adrian Stevens', False, '? / ? / 27')
将元组三元组转换为
dict
:
import re
def fix_demographic(info):
# W / M / ?? --> W / M / NaN
# ?/M/? --> NaN / M / NaN
# Keep as str NaN rather than np.nan for now
race, gender, age = re.split('\s*/\s*', re.sub('\?+', 'NaN', info))
return dict(race=race, gender=gender, age=age)
data_dict = {name: dict(officer=officer, **fix_demographic(demo))
for name, officer, demo in find_triplets(info)}
print(data_dict)
结果:
{
'Joe Schmoe': {'officer': False, 'race': 'W', 'gender': 'M', 'age': '64'},
'Lillian Schmoe': {'officer': False, 'race': 'W', 'gender': 'F', 'age': '60'},
'Richard Johnson': {'officer': True, 'race': 'W', 'gender': 'M', 'age': '48'},
'Adrian Stevens': {'officer': False, 'race': 'NaN', 'gender': 'NaN', 'age': '27'}
}