这不是最有说服力的解决方案,但这对您的示例有效,假设每个日期条目始终有三条关联的信息。
import pandas as pd
import re
raw = ['28.03.2018 - 180328-2',
'PwrSh:EncCmndTest-A [Trj]',
'25.03.2018 - 180325-0',
'ELF:Mirai-MR [Trj], MacOS:Miner-A [Trj], Android:SpyAgent-ZW [Trj], Android:SpyAgent-ZX [Trj]']
out = []
with open('converted.csv', 'w') as myfile:
for idx, i in enumerate(raw):
if i[0].isdigit():
date = i.split(' - ')[0]
else:
info = [j for j in re.split(r',| |:', i) if j!='']
for i in range(int(len(info)/3)):
myfile.write(date + ',' + ','.join(info[3*i:3*(i+1)]) + '\n')
df = pd.read_csv('converted.csv', header=None)
print(df)
输出:
0 1 2 3
0 28.03.2018 PwrSh EncCmndTest-A [Trj]
1 25.03.2018 ELF Mirai-MR [Trj]
2 25.03.2018 MacOS Miner-A [Trj]
3 25.03.2018 Android SpyAgent-ZW [Trj]
4 25.03.2018 Android SpyAgent-ZX [Trj]