Given df
df = pd.DataFrame([[1, 5, 2, 8, 2], [2, 4, 4, 20, 2], [3, 3, 1, 20, 2], [4, 2, 2, 1, 3], [5, 1, 4, -5, -4], [1, 5, 2, 2, -20],
[2, 4, 4, 3, -8], [3, 3, 1, -1, -1], [4, 2, 2, 0, 12], [5, 1, 4, 20, -2]],
columns=['A', 'B', 'C', 'D', 'E'], index=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
Based on this answer, I created a function to calculate streaks (up, down).
def streaks(df, column):
#Create sign column
df['sign'] = 0
df.loc[df[column] > 0, 'sign'] = 1
df.loc[df[column] < 0, 'sign'] = 0
# Downstreak
df['d_streak2'] = (df['sign'] == 0).cumsum()
df['cumsum'] = np.nan
df.loc[df['sign'] == 1, 'cumsum'] = df['d_streak2']
df['cumsum'] = df['cumsum'].fillna(method='ffill')
df['cumsum'] = df['cumsum'].fillna(0)
df['d_streak'] = df['d_streak2'] - df['cumsum']
df.drop(['d_streak2', 'cumsum'], axis=1, inplace=True)
# Upstreak
df['u_streak2'] = (df['sign'] == 1).cumsum()
df['cumsum'] = np.nan
df.loc[df['sign'] == 0, 'cumsum'] = df['u_streak2']
df['cumsum'] = df['cumsum'].fillna(method='ffill')
df['cumsum'] = df['cumsum'].fillna(0)
df['u_streak'] = df['u_streak2'] - df['cumsum']
df.drop(['u_streak2', 'cumsum'], axis=1, inplace=True)
del df['sign']
return df
The function works well, however is very long. I'm sure there's a much betterway to write this. I tried the other answer in but didn't work well.
This is the desired output
streaks(df, 'E')
A B C D E d_streak u_streak
1 1 5 2 8 2 0.0 1.0
2 2 4 4 20 2 0.0 2.0
3 3 3 1 20 2 0.0 3.0
4 4 2 2 1 3 0.0 4.0
5 5 1 4 -5 -4 1.0 0.0
6 1 5 2 2 -20 2.0 0.0
7 2 4 4 3 -8 3.0 0.0
8 3 3 1 -1 -1 4.0 0.0
9 4 2 2 0 12 0.0 1.0
10 5 1 4 20 -2 1.0 0.0
See Question&Answers more detail:
os