groupby
dataframe
'ID'
'A'
condition
import pandas as pd
import datetime as dt
d = {
'ID': [1, 1, 1, 1, 1, 2, 2, 2, 2, 2], # Assuming only two unique IDs for simplicity
'DATE': [ # Dates already sorted, but it would work anyways
dt.date(2018, 7, 19), dt.date(2018, 8, 18),
dt.date(2018, 9, 17), dt.date(2018, 10, 17),
dt.date(2018, 11, 16), dt.date(2018, 7, 19),
dt.date(2018, 8, 18), dt.date(2018, 9, 17),
dt.date(2018, 10, 17), dt.date(2018, 11, 16)
],
'condition': ['B', 'B', 'B', 'A', 'B', 'B', 'B', 'B', 'A', 'B']
}
# 'DATE' but with list comprehension:
# [dt.date.today() + dt.timedelta(days=30*x) for y in range(0, 2) for x in range(0, 5)]
df = pd.DataFrame(d)
>>> (df.sort_values(by='DATE') # we should call pd.to_datetime() first if...
... .groupby('ID') # 'DATE' is not datetime already
... .apply(lambda x: x[(x['condition'].values == 'A').argmax():]))
ID DATE condition
ID
1 3 1 2018-10-17 A
4 1 2018-11-16 B
2 8 2 2018-10-17 A
9 2 2018-11-16 B
reset_index(drop=True)
ID DATE condition
0 1 2018-10-17 A
1 1 2018-11-16 B
2 2 2018-10-17 A
3 2 2018-11-16 B
(x['condition'].values == 'A')
bool
np.array
argmax()
True
condition == 'A'
slice
d = {
'ID': [1, 1, 1, 1, 1, 2, 2, 2, 2, 2], # Assuming only two unique IDs for simplicity
'DATE': [ # Dates already sorted, but it would work anyways
dt.date(2018, 7, 19), dt.date(2018, 8, 18),
dt.date(2018, 9, 17), dt.date(2018, 10, 17),
dt.date(2018, 11, 16), dt.date(2018, 7, 19),
dt.date(2018, 8, 18), dt.date(2018, 9, 17),
dt.date(2018, 10, 17), dt.date(2018, 11, 16)
], # ID 1 only contains 'B'
'condition': ['B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'A', 'B']
}
df = pd.DataFrame(d)
>>> df
ID DATE condition
0 1 2018-07-19 B
1 1 2018-08-18 B
2 1 2018-09-17 B
3 1 2018-10-17 B
4 1 2018-11-16 B
5 2 2018-07-19 B
6 2 2018-08-18 B
7 2 2018-09-17 B
8 2 2018-10-17 A
9 2 2018-11-16 B
>>> (df.sort_values(by='DATE')
... .groupby('ID')
... .filter(lambda x: (x['condition'] == 'A').any())
... .groupby('ID')
... .apply(lambda x: x[(x['condition'].values == 'A').argmax():]))
ID DATE condition
ID
2 8 2 2018-10-17 A
9 2 2018-11-16 B