I have a df and list of dictionary as shown below.
df:
Date Tea_Good Tea_bad coffee_good coffee_bad
2020-02-01 3 1 10 7
2020-02-02 3 1 10 7
2020-02-03 3 1 10 7
2020-02-04 3 1 10 7
2020-02-05 6 1 10 7
2020-02-06 6 2 10 11
2020-02-07 6 2 5 11
2020-02-08 6 2 5 11
2020-02-09 9 2 5 11
2020-02-10 9 2 4 11
2020-02-11 9 2 4 11
2020-02-12 9 2 4 11
2020-02-13 9 2 4 11
2020-02-14 9 2 4 11
dictionary:
rf = {
"tea":
[
{
"type": "df",
"from": "2020-02-01T20:00:00.000Z",
"to": "2020-02-03T20:00:00.000Z",
"days":3,
"coef":[0.1,0.1,0.1,0.1,0.1,0.1],
"case":"bad"
},
{
"type": "polynomial",
"from": "2020-02-08T20:00:00.000Z",
"to": "2020-02-10T20:00:00.000Z",
"days":3,
"coef":[0.1,0.1,0.1,0.1,0.1,0.1],
"case":"good"
},
{
"type": "linear",
"from": "2020-02-01T20:00:00.000Z",
"to": "2020-02-03T20:00:00.000Z",
"days":3,
"coef":[0.1,0.1,0.1,0.1,0.1,0.1],
"case":"bad"
},
{
"type": "constant",
"from": "2020-02-04T20:00:00.000Z",
"to": "2020-02-05T20:00:00.000Z",
"days":2,
"coef":[10,10,10,10,10,10],
"case":"good"
}],
"coffee": [
{
"type": "quadratic",
"from": "2020-02-01T20:00:00.000Z",
"to": "2020-02-10T20:00:00.000Z",
"days": 10,
"coef": [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
"case":"good"
},
{
"type": "df",
"from": "2020-02-11T20:00:00.000Z",
"to": "2020-02-13T20:00:00.000Z",
"days": 5,
"coef": [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
"case":"bad"
},
{
"type": "linear",
"from": "2020-02-01T20:00:00.000Z",
"to": "2020-02-03T20:00:00.000Z",
"days": 3,
"coef": [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
"case":"good"
},
{
"type": "linear",
"from": "2020-02-03T20:00:00.000Z",
"to": "2020-02-06T20:00:00.000Z",
"days": 4,
"coef": [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
"case":"bad"
}
]
}
Where I have to update the columns based on the dictionary value.
The default value of end_date, start_date, n_days = 0,
But to do we need at least 2 as non zero.
If all are non zero, consider start_date and end_date and calculate n_days as shown below.
n_days = end_date - start_date
and update the df.
If any two are zero return df as it is for that condition.
Below are the all conditions related to end_date, start_date and n_days
if (start_date == 0) & (end_date == 0):
return df
if (start_date == 0) & (end_date != 0) & (n_days == 0):
return df
if (start_date != 0) & (end_date == 0) & (n_days == 0):
return df
# if start date, end date and n_days are non zero then consider start date and n_days
if (start_date != 0) & (end_date != 0) & (n_days != 0):
#n_days = (end_date - start_date).days
#n_days = (end_date - start_date).days
end_date = start_date + DT.timedelta(days=n_days)
if (start_date != 0) & (end_date != 0) & (n_days == 0) :
n_days = (end_date - start_date)
print(f" n day = {n_days}")
end_date = end_date
if (start_date != 0) & (end_date == 0) & (n_days != 0) :
#n_days = (end_date - start_date)
#print(f" n day = {n_days}")
end_date = start_date + DT.timedelta(days=n_days)
if (start_date == 0) & (end_date != 0) & (n_days != 0) :
start_date = end_date - DT.timedelta(days=n_days)
if (n_days != 0) & (start_date != 0):
end_date = start_date + DT.timedelta(days=n_days)
I tried below code.
def rf_user_input(df, REQUEST_OBJ):
'''
This functions returns the tea_coffee dataframe with the user input functions for tea, coffee
params: data : tea_coffee dataframe uploaded from user
request_object_api: The api should contain the below params
start_date: start date of the user function for rf
end_date : end date of the user function for the rf
label : {'constant', 'linear', 'quadratic', 'polynomial', 'exponential', 'df'}
coef : list with 6 indexes [a0,a1,a2,a3,a4,a5]
return: rf computed with user inputs
'''
# df.days.iloc[(df[df.Date==start_date].index[0])]
df = df.sort_values(by='Date')
df['days'] = (df['Date'] - df.at[0, 'Date']).dt.days + 1
REQUIRED_KEYS = ["tea", "coffee"]
for teacoffee_category in REQUIRED_KEYS:
print(f" teacoffee_category - {teacoffee_category}")
if teacoffee_category in REQUEST_OBJ.keys():
param_obj_list = REQUEST_OBJ[teacoffee_category]
for params_obj in param_obj_list:
# Do the data processing
goodbad_catgeory = params_obj['case']
kind = teacoffee_category + '_' + goodbad_catgeory
start_date, end_date, label, coef, n_days = params_obj['from'], params_obj['to'], params_obj['type'],
params_obj['coef'], params_obj['days']
start_date = DT.datetime.strptime(start_date, "%Y-%m-%dT%H:%M:%S.%fZ")
end_date = DT.datetime.strptime(end_date, "%Y-%m-%dT%H:%M:%S.%fZ")
print(f" start date - {start_date}")
print(f" end date - {end_date}")
# Additional n_days code - Start
first_date = df['Date'].min()
period_days = (start_date - first_date)
print(f" period day - {period_days}")
# Additional n_days code - End
# Checking 'start_date' , 'end_date' and 'n_days' conditions
# If the start_date and end_date is null return the calibration df as it is
if (start_date == 0) & (end_date == 0):
return df
if (start_date == 0) & (end_date != 0) & (n_days == 0):
return df
if (start_date != 0) & (end_date == 0) & (n_days == 0):
return df
# if start date, end date and n_days are non zero then consider start date and n_days
if (start_date != 0) & (end_date != 0) & (n_days != 0):
#n_days = (end_date - start_date).days
#n_days = (end_date - start_date).days
end_date = start_date + DT.timedelta(days=n_days)
if (start_date != 0) & (end_date != 0) & (n_days == 0) :
n_days = (end_date - start_date)
print(f" n day = {n_days}")
end_date = end_date
if (start_date != 0) & (end_date == 0) & (n_days != 0) :
#n_days = (end_date - start_date)
#print(f" n day = {n_days}")
end_date = start_date + DT.timedelta(days=n_days)
if (start_date == 0) & (end_date != 0) & (n_days != 0) :
start_date = end_date - DT.timedelta(days=n_days)
if (n_days != 0) & (start_date != 0):
end_date = start_date + DT.timedelta(days=n_days)
# If the start_date and end_date is null return the calibration df as it is
if len(coef) == 6:
# Coefficients Index Initializations
a0 = coef[0]
a1 = coef[1]
a2 = coef[2]
a3 = coef[3]
a4 = coef[4]
a5 = coef[5]
# Constant
if label == 'constant':
if kind == 'tea_good':
df.loc[
(df['Date'] >= start_date) & (df['Date'] <= end_date), 'Tea_Good'] = a0 + (df['days']) - period_days
elif kind == 'tea_bad':
df.loc[
(df['Date'] >= start_date) & (df['Date'] <= end_date), 'Tea_bad'] = a0 + df['days'] - period_days
elif kind == 'coffee_good':
df.loc[
(df['Date'] >= start_date) & (df['Date'] <= end_date), 'coffee_good'] = a0 + df['days'] - period_days
elif kind == 'coffee_bad':
df.loc[
(df['Date'] >= start_date) & (df['Date'] <= end_date), 'coffee_bad'] = a0 + df['days'] - period_days
# Linear
if label == 'linear':
if kind == 'tea_good':
df.loc[
(df['Date'] >= start_date) & (df['Date'] <= end_date), 'Tea_Good'] = a0 + (
a1 * ((df['days']) - period_days))