可以在构造数据帧之前折叠行。这可以通过保持一个
dict
column_name->value
每个组,当一个新组启动时刷新它,并将其添加为一行。别忘了在最后加一行。
extract = []
row = None
for line in data.splitlines():
if 'Group' in line:
if row is not None: # we have something to add
extract.append(row)
group = line.rsplit()[0]
row = {'group': group} # new group starts - refreshing our dict
if 'dog' in line or 'cat' in line or 'owl' in line:
animal, val1, val2 = line.split()
row[animal] = val1
if row is not None: # a final group
extract.append(row)
df = pd.DataFrame(extract)
df = df[['group', 'dog', 'cat', 'owl']]
print(df)
输出
group dog cat owl
0 Group1 10 21 NaN
1 Group2 23 45 24